From 6be4125be5c6883504836a7fbc151730bdc38280 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anton=20Luka=20=C5=A0ijanec?= Date: Tue, 10 Jan 2023 23:49:18 +0100 Subject: attempts to fix ping flood and sybil attacks --- src/dht.c | 252 +++++++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 202 insertions(+), 50 deletions(-) (limited to 'src') diff --git a/src/dht.c b/src/dht.c index 467b036..137c5d8 100644 --- a/src/dht.c +++ b/src/dht.c @@ -82,7 +82,7 @@ struct node { struct sockaddr_in6 addr; int unanswered; /**< number of packets I've sent since last_received */ time_t last_received; /**< time when I received the last packet from it */ - time_t last_sent; /**< time when I sent the last query to it */ + time_t last_sent; /**< time when I sent the last query to it. not incremented if it has unanswered queries. */ struct node * next; }; @@ -115,6 +115,46 @@ void node_free (struct node * n) { free(n); } +enum node_grade { + good, + bad, + questionable +}; + +/** + * @return a textual form of node_grade enum + */ + +char * node_grade_str (enum node_grade g) { + switch (g) { + case good: + return "good"; + case bad: + return "bad"; + case questionable: + return "questionable"; + } + return "unknown node_grade"; +} + +/** + * determines if node is considered good, bad or questionable + * + * @param n [in] node + * @return enum node_grade + */ + +#define QUESTIONABLE_AFTER (15*60) + +enum node_grade node_grade (const struct node * n) { + if (n->last_received + QUESTIONABLE_AFTER < seconds()) { + if (n->last_sent + 60 < seconds() && n->unanswered > 1) + return bad; + return questionable; + } + return good; +} + /** * print node to stream, for debugging * @@ -129,7 +169,7 @@ void node_print (FILE * s, const struct node * n) { char remote[INET6_ADDRSTRLEN + 64]; if (!inet_ntop(n->addr.sin6_family, n->addr.sin6_addr.s6_addr, remote, INET6_ADDRSTRLEN+7)) snprintf(remote, sizeof remote, "(inet_ntop: %s)", strerror(errno)); - fprintf(s, "%s %s/%d unans=%d", buf, remote, ntohs(n->addr.sin6_port), n->unanswered); + fprintf(s, "%s %s/%d unans=%d %s", buf, remote, ntohs(n->addr.sin6_port), n->unanswered, node_grade_str(node_grade(n))); } /** @@ -478,6 +518,10 @@ struct dht { unsigned char sample[60000]; /**< for sample infohashes */ int samples; /**< for sample infohashes, max 3000 */ #endif + unsigned p; /**< number of sent pings */ +#define PINGS_CAP 256 /**< capacity of circular buffer, one element is ~28 bytes, so this is 7168 B */ + struct sockaddr_in6 pings[PINGS_CAP]; /**< circular buffer of recent pings */ + unsigned periods; /**< number of times periodic() was called */ }; /** @@ -494,7 +538,7 @@ void dht_print (FILE * s, const struct dht * d) { char secret[17*2]; secret[17*2+1] = '\0'; bin2hex(secret, d->secret, 16); - fprintf(s, "id=%s socket=%d t=%u p=%u tmax=%u pmax=%u p/t-max=%u runsec=%ld rxp=%u txp=%u rxb=%u txb=%u secret=%s tt=%u tr=%u\n", buf, d->socket, d->torrents_num, d->peers_num, d->torrents_max, d->peers_max, d->peers_per_torrent_max, seconds()-d->time, d->rxp, d->txp, d->rxb, d->txb, secret, d->tt, d->tr); + fprintf(s, "id=%s socket=%d t=%u p=%u tmax=%u pmax=%u p/t-max=%u runsec=%ld rxp=%u txp=%u rxb=%u txb=%u secret=%s tt=%u tr=%u p=%u\n", buf, d->socket, d->torrents_num, d->peers_num, d->torrents_max, d->peers_max, d->peers_per_torrent_max, seconds()-d->time, d->rxp, d->txp, d->rxb, d->txb, secret, d->tt, d->tr, d->p); fprintf(s, "**** NODES ****\n"); int nodes = 0; for (int i = 0; i <= 1; i++) { @@ -516,7 +560,7 @@ void dht_print (FILE * s, const struct dht * d) { } b = b->next; } - fprintf(s, "\t**** COUNT OF %s BUCKETS: %d\n", i ? "IPv4" : "IPV6", buckets); + fprintf(s, "\t**** COUNT OF %s BUCKETS: %d\n", i ? "IPv6" : "IPv4", buckets); } fprintf(s, "**** COUNT OF NODES: %d\n", nodes); printf("**** TORRENTS ****\n"); @@ -644,6 +688,8 @@ void find_node (struct dht * d, const struct sockaddr_in6 * addr, const unsigned * * instead of sending a ping query, we send a find_node query. this gets us useful information of peers around our ID instead of just a blank ping reply. infolgedessen we don't have to actively search for our neighbour nodes, since we'll get them through pings anyways * + * does not ping if the same node was pinged in the last PINGS_CAP pings + * * DEV THOUGHT: instead of sending a find_node for an ID close to ours, we could send a find_node for a random ID far from us. though those buckets will probably quickly be filled by torrent searches. * * @param d [in] library handle @@ -651,6 +697,11 @@ void find_node (struct dht * d, const struct sockaddr_in6 * addr, const unsigned */ void ping_node (struct dht * d, const struct sockaddr_in6 * a) { + for (int i = 0; i < PINGS_CAP; i++) + if (!memcmp(a, &d->pings[i], sizeof *a)) { + L(debug, d, "already pinged in last " STR(PINGS_CAP) " pings, ignoring request"); + return; + } unsigned char target[20]; memcpy(target, d->id, 20); if (target[19] & 1) // flip the last bit, so the other node doesn't just return @@ -1077,28 +1128,6 @@ unsigned int distance (const unsigned char * a, const unsigned char * b) { return r; } -enum node_grade { - good, - bad, - questionable -}; - -/** - * determines if node is considered good, bad or questionable - * - * @param n [in] node - * @return enum node_grade - */ - -enum node_grade node_grade (const struct node * n) { - if (n->last_received + 15*60 < seconds()) { - if (n->last_sent + 14*60 < seconds() && n->unanswered > 1) - return bad; - return questionable; - } - return good; -} - /** * returns 1 if bucket is perfect, meaning it is fresh, has K nodes, and all nodes are good. bucket that contains id is almost never perfect, as it can usually be split into smaller buckets, that's why param d is required to get own id * @@ -1167,21 +1196,44 @@ void replied (const struct dht * d, const unsigned char * id, const struct socka struct node * node = node_init(); memcpy(&node->addr, addr, sizeof *addr); memcpy(node->id, id, 20); - if (!n) { - node->next = b->nodes; - b->nodes = node; - return; - } if (node_count(b->nodes) < K) { - struct node * index = b->nodes; - while (index->next && memcmp(node->id, index->next->id, 20) > 1) - index = index->next; - node->next = index->next; - index->next = node; + if (!n) { + node->next = b->nodes; + b->nodes = node; + } else { + node->next = n->next; + n->next = node; + } return; } node_free(node); if (in_bucket(d->id, b)) { + struct bucket * bucket = d->buckets; + if (family(addr->sin6_addr.s6_addr) == AF_INET6) + bucket = d->buckets6; + while (bucket) { + struct node * n = bucket->nodes; + while (n) { + char remote[INET_ADDRSTRLEN + INET6_ADDRSTRLEN + 64]; + if (!inet_ntop(AF_INET6, addr->sin6_addr.s6_addr, remote, INET6_ADDRSTRLEN+7+INET_ADDRSTRLEN)) + snprintf(remote, sizeof remote, "(inet_ntop: %s)", strerror(errno)); + switch(family(addr->sin6_addr.s6_addr)) { + case AF_INET: + if (!memcmp(addr->sin6_addr.s6_addr+12, n->addr.sin6_addr.s6_addr+12, 4)) { + L(disagreement, d, "sybil: %s/%d is already present", remote, ntohs(addr->sin6_port)); + return; + } + break; + case AF_INET6: + if (!memcmp(addr->sin6_addr.s6_addr+1, n->addr.sin6_addr.s6_addr+1, 5)) { + L(disagreement, d, "sybil: %s/%d is already present", remote, ntohs(addr->sin6_port)); + } + break; + } + n = n->next; + } + bucket = bucket->next; + } split(b); replied(d, id, addr); // find bucket again } @@ -1936,10 +1988,10 @@ void handle (struct dht * d, char * pkt, int len, struct sockaddr_in6 addr) { struct bencoding * nodes = bpath(b, "r/nodes"); struct bencoding * nodes6 = bpath(b, "r/nodes6"); if (nodes && nodes->type & string && !(nodes->valuelen % 26)) - for (unsigned i = 0; i < nodes->valuelen; i += 26) + for (unsigned i = 0; i < MIN(nodes->valuelen, K); i += 26) compact(d, nodes->value+i, 26, torrent); if (nodes6 && nodes6->type & string && !(nodes6->valuelen % 38)) - for (unsigned i = 0; i < nodes6->valuelen; i += 38) + for (unsigned i = 0; i < MIN(nodes6->valuelen, K); i += 38) compact(d, nodes6->value+i, 38, torrent); break; case 'E': @@ -2068,16 +2120,24 @@ d: } // do not log, it may have been a bencoded reply } +#define PERIODIC 10 + /** - * do periodic housekeeping on the routing table LL, making sure no nodes are bad. removes bad nodes and does not ping questionable nodes. see NOTE03 + * do periodic housekeeping on the routing table LL, making sure no nodes are bad. removes bad nodes. detects sybil. see NOTE03 * - * @param b [in] first bucket in LL - basically either d->buckets or d->buckets6 + * @param d [in] library handle + * @param fam [in] AF_INET for buckets and AF_INET6 for buckets6 * @return number of good nodes */ -int refresh (struct bucket * b) { +int refresh (struct dht * d, int fam) { int nrgood = 0; + int buckets = 0; + struct bucket * b = d->buckets; + if (fam == AF_INET6) + b = d->buckets6; while (b) { + buckets++; struct node ** n = &b->nodes; while (*n) { switch (node_grade(*n)) { @@ -2088,8 +2148,9 @@ int refresh (struct bucket * b) { node_free(old); continue; case questionable: - // ping_node(d, *n); // NOTE03 about not pinging questionable nodes: this ensures a constant regeneration of the routing table. this is just an idea, if the client frequently gets in a situation without any nodes in the routing table, remove the comment before ping_node call. - break; + if (!(rand() % (QUESTIONABLE_AFTER/PERIODIC))) + ping_node(d, &(*n)->addr); // NOTE03 about not pinging questionable nodes: this ensures a constant regeneration of the routing table. this is just an idea, if the client frequently gets in a situation without any nodes in the routing table, remove the comment before ping_node call. + break; // update on why I uncommented: to mitigate sybil attack, it's baje important to prefer old nodes case good: nrgood++; break; @@ -2098,11 +2159,40 @@ int refresh (struct bucket * b) { } b = b->next; } + if (buckets > 32) { // sybil attack - node is broken - clear whole routing table, keeping one bucket + L(disagreement, d, "@@@@@@ SYBIL ATTACK - CLEARING ROUTING TABLE @@@@@@"); + int keep_first = rand() % 2; // should we even keep one bucket? the sybil node has a 1/2 + if (keep_first) { // chance of having stared in the bucket farthest away, so it's stored there ... + memset(d->buckets->id, '\0', 20); + b = d->buckets->next; + d->buckets->next = NULL; + while (b) { + bucket_free(b); + b = b->next; + } + } else { + b = d->buckets; + while (b->next) { + struct bucket * old = b; + b = b->next; + bucket_free(old); + } + d->buckets = b; + memset(d->buckets->id, '\0', 20); + } + if (getrandom(d->id, 20, GRND_NONBLOCK) == -1) // changing our ID. note that this might make + L(std_fail, d, "getrandom: %s", strerror(errno)); // existing nodes hate us + switch (fam) { + case AF_INET: + return node_count(d->buckets6->nodes); + case AF_INET6: + return node_count(d->buckets6->nodes); + } + return 0; + } return nrgood; } -#define PERIODIC 10 - /** * does periodic work for the library * @@ -2124,11 +2214,12 @@ int refresh (struct bucket * b) { */ void periodic (struct dht * d) { + d->periods++; L(debug, d, "called"); int dns = 0; - if (!refresh(d->buckets)) + if (!refresh(d, AF_INET)) dns++; - if (!refresh(d->buckets6)) + if (!refresh(d, AF_INET6)) dns++; if (dns) { char packet[512]; @@ -2187,12 +2278,71 @@ void periodic (struct dht * d) { struct torrent * t = d->torrents; while (t) { if (t->type & (peers | announce)) { + /* + struct node * n = t->nodes; + int c = node_count(n); + if (!c) { +#define RTGP(buckets) {struct bucket * b = d->buckets; \ + find(t->hash, &b, NULL); \ + struct node * n = b->nodes; \ + c = node_count(n); \ + if (c) { \ + c = rand() % c; \ + while (c--) \ + n = n->next; \ + if (!n->unanswered) \ + n->last_sent = seconds(); \ + n->unanswered++; \ + get_peers(d, &n->addr, t->hash); \ + } else { \ + struct bucket * b = d->buckets; \ + c = 0; \ + while (b) { \ + c += node_count(b->nodes); \ + b = b->next; \ + } \ + if (c) { \ + c = rand() % c; \ + b = d->buckets; \ + int i = 0; \ + while (b) { \ + struct node * n = b->nodes; \ + while (n) { \ + if (i++ == c) { \ + i = -1; \ + if (!n->unanswered) \ + n->last_sent = seconds(); \ + n->unanswered++; \ + get_peers(d, &n->addr, t->hash); \ + break; \ + } \ + n = n->nodes; \ + } \ + if (i == -1) \ + break; \ + b = b->next; \ + } \ + } \ + } + RTGP(buckets); + RTGP(buckets6); + } else { + c = rand() % c; + while (c--) + n = n->next; + if (!n->unanswered) + n->last_sent = seconds(); + n->unanswered++; + get_peers(d, &n->addr, t->hash); + } + */ struct node * n = t->nodes; int sent = 0; while (n) { sent++; + if (!n->unanswered) + n->last_sent = seconds(); n->unanswered++; - n->last_sent = seconds(); get_peers(d, &n->addr, t->hash); n = n->next; } @@ -2202,8 +2352,9 @@ void periodic (struct dht * d) { struct node * n = b->nodes; \ while (sent < K && n) { \ sent++; \ + if (!n->unanswered) \ + n->last_sent = seconds(); \ n->unanswered++; \ - n->last_sent = seconds(); \ get_peers(d, &n->addr, t->hash); \ n = n->next; \ }} @@ -2216,8 +2367,9 @@ void periodic (struct dht * d) { n = b->nodes; while (sent < K && n) { sent++; + if (!n->unanswered) + n->last_sent = seconds(); n->unanswered++; - n->last_sent = seconds(); get_peers(d, &n->addr, t->hash); n = n->next; } -- cgit v1.2.3