summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAnton Luka Šijanec <anton@sijanec.eu>2023-01-10 23:49:18 +0100
committerAnton Luka Šijanec <anton@sijanec.eu>2023-01-10 23:49:18 +0100
commit6be4125be5c6883504836a7fbc151730bdc38280 (patch)
tree394dec26f4f6d2df4d296a485b8584c9cc39fcfc /src
parentmetadl works, but wastes bandwidth (diff)
downloadtravnik-6be4125be5c6883504836a7fbc151730bdc38280.tar
travnik-6be4125be5c6883504836a7fbc151730bdc38280.tar.gz
travnik-6be4125be5c6883504836a7fbc151730bdc38280.tar.bz2
travnik-6be4125be5c6883504836a7fbc151730bdc38280.tar.lz
travnik-6be4125be5c6883504836a7fbc151730bdc38280.tar.xz
travnik-6be4125be5c6883504836a7fbc151730bdc38280.tar.zst
travnik-6be4125be5c6883504836a7fbc151730bdc38280.zip
Diffstat (limited to 'src')
-rw-r--r--src/dht.c252
1 files changed, 202 insertions, 50 deletions
diff --git a/src/dht.c b/src/dht.c
index 467b036..137c5d8 100644
--- a/src/dht.c
+++ b/src/dht.c
@@ -82,7 +82,7 @@ struct node {
struct sockaddr_in6 addr;
int unanswered; /**< number of packets I've sent since last_received */
time_t last_received; /**< time when I received the last packet from it */
- time_t last_sent; /**< time when I sent the last query to it */
+ time_t last_sent; /**< time when I sent the last query to it. not incremented if it has unanswered queries. */
struct node * next;
};
@@ -115,6 +115,46 @@ void node_free (struct node * n) {
free(n);
}
+enum node_grade {
+ good,
+ bad,
+ questionable
+};
+
+/**
+ * @return a textual form of node_grade enum
+ */
+
+char * node_grade_str (enum node_grade g) {
+ switch (g) {
+ case good:
+ return "good";
+ case bad:
+ return "bad";
+ case questionable:
+ return "questionable";
+ }
+ return "unknown node_grade";
+}
+
+/**
+ * determines if node is considered good, bad or questionable
+ *
+ * @param n [in] node
+ * @return enum node_grade
+ */
+
+#define QUESTIONABLE_AFTER (15*60)
+
+enum node_grade node_grade (const struct node * n) {
+ if (n->last_received + QUESTIONABLE_AFTER < seconds()) {
+ if (n->last_sent + 60 < seconds() && n->unanswered > 1)
+ return bad;
+ return questionable;
+ }
+ return good;
+}
+
/**
* print node to stream, for debugging
*
@@ -129,7 +169,7 @@ void node_print (FILE * s, const struct node * n) {
char remote[INET6_ADDRSTRLEN + 64];
if (!inet_ntop(n->addr.sin6_family, n->addr.sin6_addr.s6_addr, remote, INET6_ADDRSTRLEN+7))
snprintf(remote, sizeof remote, "(inet_ntop: %s)", strerror(errno));
- fprintf(s, "%s %s/%d unans=%d", buf, remote, ntohs(n->addr.sin6_port), n->unanswered);
+ fprintf(s, "%s %s/%d unans=%d %s", buf, remote, ntohs(n->addr.sin6_port), n->unanswered, node_grade_str(node_grade(n)));
}
/**
@@ -478,6 +518,10 @@ struct dht {
unsigned char sample[60000]; /**< for sample infohashes */
int samples; /**< for sample infohashes, max 3000 */
#endif
+ unsigned p; /**< number of sent pings */
+#define PINGS_CAP 256 /**< capacity of circular buffer, one element is ~28 bytes, so this is 7168 B */
+ struct sockaddr_in6 pings[PINGS_CAP]; /**< circular buffer of recent pings */
+ unsigned periods; /**< number of times periodic() was called */
};
/**
@@ -494,7 +538,7 @@ void dht_print (FILE * s, const struct dht * d) {
char secret[17*2];
secret[17*2+1] = '\0';
bin2hex(secret, d->secret, 16);
- fprintf(s, "id=%s socket=%d t=%u p=%u tmax=%u pmax=%u p/t-max=%u runsec=%ld rxp=%u txp=%u rxb=%u txb=%u secret=%s tt=%u tr=%u\n", buf, d->socket, d->torrents_num, d->peers_num, d->torrents_max, d->peers_max, d->peers_per_torrent_max, seconds()-d->time, d->rxp, d->txp, d->rxb, d->txb, secret, d->tt, d->tr);
+ fprintf(s, "id=%s socket=%d t=%u p=%u tmax=%u pmax=%u p/t-max=%u runsec=%ld rxp=%u txp=%u rxb=%u txb=%u secret=%s tt=%u tr=%u p=%u\n", buf, d->socket, d->torrents_num, d->peers_num, d->torrents_max, d->peers_max, d->peers_per_torrent_max, seconds()-d->time, d->rxp, d->txp, d->rxb, d->txb, secret, d->tt, d->tr, d->p);
fprintf(s, "**** NODES ****\n");
int nodes = 0;
for (int i = 0; i <= 1; i++) {
@@ -516,7 +560,7 @@ void dht_print (FILE * s, const struct dht * d) {
}
b = b->next;
}
- fprintf(s, "\t**** COUNT OF %s BUCKETS: %d\n", i ? "IPv4" : "IPV6", buckets);
+ fprintf(s, "\t**** COUNT OF %s BUCKETS: %d\n", i ? "IPv6" : "IPv4", buckets);
}
fprintf(s, "**** COUNT OF NODES: %d\n", nodes);
printf("**** TORRENTS ****\n");
@@ -644,6 +688,8 @@ void find_node (struct dht * d, const struct sockaddr_in6 * addr, const unsigned
*
* instead of sending a ping query, we send a find_node query. this gets us useful information of peers around our ID instead of just a blank ping reply. infolgedessen we don't have to actively search for our neighbour nodes, since we'll get them through pings anyways
*
+ * does not ping if the same node was pinged in the last PINGS_CAP pings
+ *
* DEV THOUGHT: instead of sending a find_node for an ID close to ours, we could send a find_node for a random ID far from us. though those buckets will probably quickly be filled by torrent searches.
*
* @param d [in] library handle
@@ -651,6 +697,11 @@ void find_node (struct dht * d, const struct sockaddr_in6 * addr, const unsigned
*/
void ping_node (struct dht * d, const struct sockaddr_in6 * a) {
+ for (int i = 0; i < PINGS_CAP; i++)
+ if (!memcmp(a, &d->pings[i], sizeof *a)) {
+ L(debug, d, "already pinged in last " STR(PINGS_CAP) " pings, ignoring request");
+ return;
+ }
unsigned char target[20];
memcpy(target, d->id, 20);
if (target[19] & 1) // flip the last bit, so the other node doesn't just return
@@ -1077,28 +1128,6 @@ unsigned int distance (const unsigned char * a, const unsigned char * b) {
return r;
}
-enum node_grade {
- good,
- bad,
- questionable
-};
-
-/**
- * determines if node is considered good, bad or questionable
- *
- * @param n [in] node
- * @return enum node_grade
- */
-
-enum node_grade node_grade (const struct node * n) {
- if (n->last_received + 15*60 < seconds()) {
- if (n->last_sent + 14*60 < seconds() && n->unanswered > 1)
- return bad;
- return questionable;
- }
- return good;
-}
-
/**
* returns 1 if bucket is perfect, meaning it is fresh, has K nodes, and all nodes are good. bucket that contains id is almost never perfect, as it can usually be split into smaller buckets, that's why param d is required to get own id
*
@@ -1167,21 +1196,44 @@ void replied (const struct dht * d, const unsigned char * id, const struct socka
struct node * node = node_init();
memcpy(&node->addr, addr, sizeof *addr);
memcpy(node->id, id, 20);
- if (!n) {
- node->next = b->nodes;
- b->nodes = node;
- return;
- }
if (node_count(b->nodes) < K) {
- struct node * index = b->nodes;
- while (index->next && memcmp(node->id, index->next->id, 20) > 1)
- index = index->next;
- node->next = index->next;
- index->next = node;
+ if (!n) {
+ node->next = b->nodes;
+ b->nodes = node;
+ } else {
+ node->next = n->next;
+ n->next = node;
+ }
return;
}
node_free(node);
if (in_bucket(d->id, b)) {
+ struct bucket * bucket = d->buckets;
+ if (family(addr->sin6_addr.s6_addr) == AF_INET6)
+ bucket = d->buckets6;
+ while (bucket) {
+ struct node * n = bucket->nodes;
+ while (n) {
+ char remote[INET_ADDRSTRLEN + INET6_ADDRSTRLEN + 64];
+ if (!inet_ntop(AF_INET6, addr->sin6_addr.s6_addr, remote, INET6_ADDRSTRLEN+7+INET_ADDRSTRLEN))
+ snprintf(remote, sizeof remote, "(inet_ntop: %s)", strerror(errno));
+ switch(family(addr->sin6_addr.s6_addr)) {
+ case AF_INET:
+ if (!memcmp(addr->sin6_addr.s6_addr+12, n->addr.sin6_addr.s6_addr+12, 4)) {
+ L(disagreement, d, "sybil: %s/%d is already present", remote, ntohs(addr->sin6_port));
+ return;
+ }
+ break;
+ case AF_INET6:
+ if (!memcmp(addr->sin6_addr.s6_addr+1, n->addr.sin6_addr.s6_addr+1, 5)) {
+ L(disagreement, d, "sybil: %s/%d is already present", remote, ntohs(addr->sin6_port));
+ }
+ break;
+ }
+ n = n->next;
+ }
+ bucket = bucket->next;
+ }
split(b);
replied(d, id, addr); // find bucket again
}
@@ -1936,10 +1988,10 @@ void handle (struct dht * d, char * pkt, int len, struct sockaddr_in6 addr) {
struct bencoding * nodes = bpath(b, "r/nodes");
struct bencoding * nodes6 = bpath(b, "r/nodes6");
if (nodes && nodes->type & string && !(nodes->valuelen % 26))
- for (unsigned i = 0; i < nodes->valuelen; i += 26)
+ for (unsigned i = 0; i < MIN(nodes->valuelen, K); i += 26)
compact(d, nodes->value+i, 26, torrent);
if (nodes6 && nodes6->type & string && !(nodes6->valuelen % 38))
- for (unsigned i = 0; i < nodes6->valuelen; i += 38)
+ for (unsigned i = 0; i < MIN(nodes6->valuelen, K); i += 38)
compact(d, nodes6->value+i, 38, torrent);
break;
case 'E':
@@ -2068,16 +2120,24 @@ d:
} // do not log, it may have been a bencoded reply
}
+#define PERIODIC 10
+
/**
- * do periodic housekeeping on the routing table LL, making sure no nodes are bad. removes bad nodes and does not ping questionable nodes. see NOTE03
+ * do periodic housekeeping on the routing table LL, making sure no nodes are bad. removes bad nodes. detects sybil. see NOTE03
*
- * @param b [in] first bucket in LL - basically either d->buckets or d->buckets6
+ * @param d [in] library handle
+ * @param fam [in] AF_INET for buckets and AF_INET6 for buckets6
* @return number of good nodes
*/
-int refresh (struct bucket * b) {
+int refresh (struct dht * d, int fam) {
int nrgood = 0;
+ int buckets = 0;
+ struct bucket * b = d->buckets;
+ if (fam == AF_INET6)
+ b = d->buckets6;
while (b) {
+ buckets++;
struct node ** n = &b->nodes;
while (*n) {
switch (node_grade(*n)) {
@@ -2088,8 +2148,9 @@ int refresh (struct bucket * b) {
node_free(old);
continue;
case questionable:
- // ping_node(d, *n); // NOTE03 about not pinging questionable nodes: this ensures a constant regeneration of the routing table. this is just an idea, if the client frequently gets in a situation without any nodes in the routing table, remove the comment before ping_node call.
- break;
+ if (!(rand() % (QUESTIONABLE_AFTER/PERIODIC)))
+ ping_node(d, &(*n)->addr); // NOTE03 about not pinging questionable nodes: this ensures a constant regeneration of the routing table. this is just an idea, if the client frequently gets in a situation without any nodes in the routing table, remove the comment before ping_node call.
+ break; // update on why I uncommented: to mitigate sybil attack, it's baje important to prefer old nodes
case good:
nrgood++;
break;
@@ -2098,11 +2159,40 @@ int refresh (struct bucket * b) {
}
b = b->next;
}
+ if (buckets > 32) { // sybil attack - node is broken - clear whole routing table, keeping one bucket
+ L(disagreement, d, "@@@@@@ SYBIL ATTACK - CLEARING ROUTING TABLE @@@@@@");
+ int keep_first = rand() % 2; // should we even keep one bucket? the sybil node has a 1/2
+ if (keep_first) { // chance of having stared in the bucket farthest away, so it's stored there ...
+ memset(d->buckets->id, '\0', 20);
+ b = d->buckets->next;
+ d->buckets->next = NULL;
+ while (b) {
+ bucket_free(b);
+ b = b->next;
+ }
+ } else {
+ b = d->buckets;
+ while (b->next) {
+ struct bucket * old = b;
+ b = b->next;
+ bucket_free(old);
+ }
+ d->buckets = b;
+ memset(d->buckets->id, '\0', 20);
+ }
+ if (getrandom(d->id, 20, GRND_NONBLOCK) == -1) // changing our ID. note that this might make
+ L(std_fail, d, "getrandom: %s", strerror(errno)); // existing nodes hate us
+ switch (fam) {
+ case AF_INET:
+ return node_count(d->buckets6->nodes);
+ case AF_INET6:
+ return node_count(d->buckets6->nodes);
+ }
+ return 0;
+ }
return nrgood;
}
-#define PERIODIC 10
-
/**
* does periodic work for the library
*
@@ -2124,11 +2214,12 @@ int refresh (struct bucket * b) {
*/
void periodic (struct dht * d) {
+ d->periods++;
L(debug, d, "called");
int dns = 0;
- if (!refresh(d->buckets))
+ if (!refresh(d, AF_INET))
dns++;
- if (!refresh(d->buckets6))
+ if (!refresh(d, AF_INET6))
dns++;
if (dns) {
char packet[512];
@@ -2187,12 +2278,71 @@ void periodic (struct dht * d) {
struct torrent * t = d->torrents;
while (t) {
if (t->type & (peers | announce)) {
+ /*
+ struct node * n = t->nodes;
+ int c = node_count(n);
+ if (!c) {
+#define RTGP(buckets) {struct bucket * b = d->buckets; \
+ find(t->hash, &b, NULL); \
+ struct node * n = b->nodes; \
+ c = node_count(n); \
+ if (c) { \
+ c = rand() % c; \
+ while (c--) \
+ n = n->next; \
+ if (!n->unanswered) \
+ n->last_sent = seconds(); \
+ n->unanswered++; \
+ get_peers(d, &n->addr, t->hash); \
+ } else { \
+ struct bucket * b = d->buckets; \
+ c = 0; \
+ while (b) { \
+ c += node_count(b->nodes); \
+ b = b->next; \
+ } \
+ if (c) { \
+ c = rand() % c; \
+ b = d->buckets; \
+ int i = 0; \
+ while (b) { \
+ struct node * n = b->nodes; \
+ while (n) { \
+ if (i++ == c) { \
+ i = -1; \
+ if (!n->unanswered) \
+ n->last_sent = seconds(); \
+ n->unanswered++; \
+ get_peers(d, &n->addr, t->hash); \
+ break; \
+ } \
+ n = n->nodes; \
+ } \
+ if (i == -1) \
+ break; \
+ b = b->next; \
+ } \
+ } \
+ }
+ RTGP(buckets);
+ RTGP(buckets6);
+ } else {
+ c = rand() % c;
+ while (c--)
+ n = n->next;
+ if (!n->unanswered)
+ n->last_sent = seconds();
+ n->unanswered++;
+ get_peers(d, &n->addr, t->hash);
+ }
+ */
struct node * n = t->nodes;
int sent = 0;
while (n) {
sent++;
+ if (!n->unanswered)
+ n->last_sent = seconds();
n->unanswered++;
- n->last_sent = seconds();
get_peers(d, &n->addr, t->hash);
n = n->next;
}
@@ -2202,8 +2352,9 @@ void periodic (struct dht * d) {
struct node * n = b->nodes; \
while (sent < K && n) { \
sent++; \
+ if (!n->unanswered) \
+ n->last_sent = seconds(); \
n->unanswered++; \
- n->last_sent = seconds(); \
get_peers(d, &n->addr, t->hash); \
n = n->next; \
}}
@@ -2216,8 +2367,9 @@ void periodic (struct dht * d) {
n = b->nodes;
while (sent < K && n) {
sent++;
+ if (!n->unanswered)
+ n->last_sent = seconds();
n->unanswered++;
- n->last_sent = seconds();
get_peers(d, &n->addr, t->hash);
n = n->next;
}