From 8c7fd0a0b2e1abc72d67a1bd3a838054b3938f1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anton=20Luka=20=C5=A0ijanec?= Date: Wed, 23 Nov 2022 13:39:56 +0100 Subject: bencoding --- .gitignore | 1 + .gitmodules | 6 ++ README.md | 17 +--- src/bencoding.c | 267 ++++++++++++++++++++++++++++++++++++++++-------------- src/dht.c | 4 + utils/bencoding.c | 14 ++- 6 files changed, 223 insertions(+), 86 deletions(-) create mode 100644 .gitmodules diff --git a/.gitignore b/.gitignore index 9433c58..d08bd07 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ test-eval a.out core .gdb_history +tiny-AES-c/ diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..3039ba6 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "vendor"] + path = vendor + url = http://ni.sijanec.eu/anonymous/tiny-AES-c/ +[submodule "tiny-AES-c"] + path = tiny-AES-c + url = http://ni.sijanec.eu/anonymous/tiny-AES-c diff --git a/README.md b/README.md index db218ef..6f4cf70 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,2 @@ -# travnik - NOT IMPLEMENTED YET, COME BACK LATER! - -... is a tool that connects to the bittorent dht network and waits for infohashes of torrents, fetches their metadata, stores it in a database and indexes them via a web-interface. It's meant to be a lighter and simpler alternative to [btdig.com's erlang crawler](https://btdig.com). - -travnik operates single-threadedly, including the BEP-5 (DHT), BEP-9 (metadata exchange) and HTTP client. - -travnik implements BEP-3 (bencoding), BEP-5 (DHT) and BEP-9 (metadata exchange) itself, other things (mysql client and http server) are handled by libraries. - -## requirements - -`build-essential`, `libmicrohttpd-dev`, `default-libmysqlclient-dev` - -## installation - -other users compile from source with `make`. +# external libraries +* https://github.com/kokke/tiny-AES-c diff --git a/src/bencoding.c b/src/bencoding.c index d1ac517..801c5b6 100644 --- a/src/bencoding.c +++ b/src/bencoding.c @@ -8,13 +8,12 @@ enum benc { num = 1 << 1, list = 1 << 2, dict = 1 << 3, - terminate = 1 << 4 /**< bencoding strings are terminated and you do not need bencoding_string to use them. breaks input str. - note: when out of space, the terminator is placed instead of the last character of the string. **/ + replace = 1 << 4 /**< replace existing element with same key when using binsert() instead of prepending the new element before the old. see binsert() docs. */ }; /** * structure representation of bencoded data - * the structure does not copy any data, it's assumed that the origin string that was used to create the structure does not change + * the structure copies strings */ struct bencoding { @@ -24,14 +23,11 @@ struct bencoding { struct bencoding * parent; enum benc type; /**< type | opts of this element */ struct bencoding * key; /**< the key element, string according to the spec, applicable for dict */ - char * value; /**< set to the content of the element, value is not null terminated unless terminate opt is set. NULL for dict and list. */ - size_t valuelen; /**< length of string value, as value is not null terminated, internal value for list or dict. */ + char * value; /**< set to the content of the element. \0 terminated. NULL for dict and list. */ + size_t valuelen; /**< length of string value. */ long int intvalue; int index; - char oldterminator; /**< when opts&terminate, the character that was replaced with \0 is stored here */ - char oldterminatorls; /**< when opts&terminate when there was no more space, replaced character is stored here. - if there'd be enough space, the next one of this one would be replaced. - this is used by bencoding string, as it will repair the original string and restore the last character. */ + const char * after; /**< internal, set to character after this bencoded element in input string, used by recursive bdecode */ }; /** @@ -44,10 +40,144 @@ void free_bencoding (struct bencoding * b) { free_bencoding(b->child); /* we free the child should it exist. it can be NULL. */ free_bencoding(b->key); /* should this be an element of a dict, free the key */ free_bencoding(b->next); + free(b->value); free(b); return; } +/** + * compares two bencoding elements. used in binsert. elements with different types are always different. + * + * @param a [in] if this one is higher, -1 is returned + * @param b [in] if this one is higher, 1 is returned + * @return -1 if a is higher, 1 if b is higher, 0 if they are the same + */ + +int bcompare (struct bencoding * a, struct bencoding * b) { + if (!a && !b) + return 0; + if (!a && b) + return -1; + if (a && !b) + return 1; + if (a->type & (num | string | list | dict) < b->type (num | string | list | dict)) + return -1; + if (a->type & (num | string | list | dict) > b->type (num | string | list | dict)) + return 1; + int ret = bcompare(a->key, b->key); + if (ret) + return ret; + if (a->type & num) { + if (a->intvalue != b->intvalue) + return a->intvalue < b->intvalue ? -1 : 1; + else + return 0; + } + if (a->type & string) { + if (a->valuelen != b->valuelen) + return a->valuelen < b->valuelen ? -1 : 1; + else + return memcmp(a->value, b->value, a->valuelen); + } + if (!a->child && b->child) + return -1; + if (a->child && !b->child) + return 1; + a = a->child; + b = b->child; + if (a->value & (list | dict)) { + while (1) { + if (!a && !b) + return 0; + int ret = bcompare(a, b); + if (ret) + return ret; + a = a->next; + b = b->next; + } + } +} + +/** + * insert into bencoding dict or list. if key already exists, it's prepended to the already existing key, unless opts has replace set. + * + * the memory pointed to by elem is considered ownership and responsibility of the dict now, so it shouldn't be freed by the caller. it can still be modified, however. + * + * if elem or benc is NULL, function does nothing. + * + * default (without replace), new element will be inserted into the dict, but before the old element, with the aim that finders, such as bpath(), would return the new element instead of the old. + * replace option frees the old element and inserts this one instead if the key already exists in the dict. this is not the default, because it frees objects that may be used elsewhere. + * + * @param benc [in] the structure to which elem will be inserted into + * @param elem [in] the element that will be inserted into the structure + */ + +void binsert (struct bencoding * benc, struct bencoding * elem) { + if (!benc || !elem) + return NULL; + elem->parent = benc; + if (!benc->child) { + elem->next = NULL; + elem->prev = NULL; + elem->parent = benc; + benc->child = elem; + return; + } + benc = benc->child; + while (benc->next && bcompare(elem->key, benc->next->key) < 0) + benc = benc->next; + struct bencoding * oldnext = benc->next; + benc->next = elem; + elem->prev = benc; + elem->next = oldnext; + if (oldnext) + oldnext->prev = elem; +} + +/** + * returns a bencoding element that represents a string + * + * the string ownership is not transfered, the string is strdup()ed + * + * @param str [in] the string to be converted to a bencoding element + */ + +struct bencoding * bstr (const char * str) { + struct bencoding * b = calloc(1, sizeof *b); + if (!b) + return NULL; + b->type = string; + b->valuelen = strlen(str); + b->value = strdup(str); + if (!b->value) { + free(b); + return NULL; + } + return b; +} + +/** + * returns a bencoding element that represents a number + * + * @param num [in] the number to be converted to a bencoding number + */ + +struct bencoding * bnum (long int num) { + struct bencoding * b = calloc(1, sizeof *b); + if (!b) + return NULL; + b->type = num; + char buf[512]; + sprintf(buf, "%ld", num); + b->value = strdup(buf); + if (!b->valueint) { + free(b); + return NULL; + } + b->valueint = num; + return b; +} + /** * helper macros for number comparisons */ @@ -140,8 +270,6 @@ int b2json_length (struct bencoding * b) { return 4; if (b->type & string) { int size = 2; - if (b->oldterminatorls) - size += b2json_charsize(b->oldterminatorls) - b2json_charsize('\0'); for (size_t i = 0; i < b->valuelen; i++) size += b2json_charsize(b->value[i]); return size; @@ -196,10 +324,7 @@ char * b2json (char * dest, struct bencoding * b) { if (b->type & string) { *dest++ = '"'; for (size_t i = 0; i < b->valuelen; i++) - if (i == b->valuelen-1 && b->oldterminatorls) - dest = b2json_charrepr(dest, b->oldterminatorls); - else - dest = b2json_charrepr(dest, b->value[i]); + dest = b2json_charrepr(dest, b->value[i]); *dest++ = '"'; return dest; } @@ -250,28 +375,11 @@ char * b2json (char * dest, struct bencoding * b) { #pragma GCC diagnostic pop } -/** - * macro that allocas a C string from a bencoding string or other element. non-string elements return their raw bencoded content. - * dereferences structure without checking. - * resulting C string is NULL terminated, cannot contain NULL, DO NOT dereference bytes after the NULL terminator. - * - * @param stru [in] bencoding structure of a bdecoded element - * @param char [out] char * type variable that will contain allocad string. DO NOT ATTEMPT TO FREE; automatic free at return! - */ - -#define bencoding_string(stru, char) \ - char = alloca(stru->valuelen+1); \ - snprintf(char, stru->valuelen+1, "%.*s", stru->valuelen, stru->value); \ - if (stru->oldterminatorls) \ - char[stru->valuelen-1] = char[stru->oldterminatorls]; \ - /** * bdecodes a bencoded structure from a string into a bencoding structure that must be free_bencodinged by the caller. * * nonstandard things: this parser allows for dict keys to be of any type, valuekey * - * by default input string is unmodified, unless terminate opt is set. - * * @param len [in] * if set to -1, string is assumed to be correct and not NULL terminated, NULLs may be in strings. * - malicious strings may trigger reads past the end of the buffer, which may lead to undefined * behaviour, crashes (DoS) or leaks of content, stored in memory. @@ -288,21 +396,27 @@ char * b2json (char * dest, struct bencoding * b) { * opts will be reflected in the ->type of the returning struct. opts will apply to childs of lists&dicts too. */ -struct bencoding * bdecode (char * s, int len, enum benc opts) { +struct bencoding * bdecode (const char * s, int len, enum benc opts) { if (!s || len < -2 || (len >= 0 && len < 2 /* 2 being the smallest bencoding string */)) return NULL; if (len == -2) len = strlen(s); struct bencoding * b = calloc(1, sizeof(struct bencoding)); /* SEGV if OOM */ - char * c = NULL; + char * ch = NULL; switch (s[0]) { case 'i': /* num */ b->type = num; - b->value = s+1; if (len == -1 || memchr(s, 'e', len)) { /* correct string or end found */ - b->intvalue = strtol(b->value, &c, 10); - b->valuelen = c-b->value; - } + b->intvalue = strtol(s+1, &ch, 10); + b->valuelen = ch-(s+1); + b->value = malloc(b->valuelen+1); + if (!b->value) + return NULL; + strncpy(b->value, s+1, b->valuelen); + b->value[b->valuelen] = '\0'; + b->after = s+2+b->valuelen; + } else + return NULL; break; case 'd': /* dict */ b->type = dict; @@ -310,19 +424,15 @@ struct bencoding * bdecode (char * s, int len, enum benc opts) { case 'l': /* list */ if (!b->type) b->type = list; - c = s+1; + const char * cp = s+1; struct bencoding * arbeit = NULL; struct bencoding * oldarbeit = NULL; struct bencoding * oldoldarbeit = NULL; /* for dicts, holds previous value */ int index = 0; - while (len == -1 || c <= s+len) { /* s+len is max we are allowed to read */ - if (oldarbeit && oldarbeit->type & string && oldarbeit->type & terminate && oldarbeit->oldterminator) - c[0] = oldarbeit->oldterminator; - arbeit = bdecode(c, len == -1 ? -1 : len-(c-s), opts); + while (len == -1 || cp <= s+len) { /* s+len is max we are allowed to read */ + arbeit = bdecode(cp, len == -1 ? -1 : len-(cp-s), opts); if (arbeit) arbeit->parent = b; - if (oldarbeit && oldarbeit->type & string && oldarbeit->type & terminate && oldarbeit->oldterminator) - c[0] = '\0'; if (!arbeit) /* bdecoding failed or last element */ break; #define ISDICT (b->type & dict) @@ -331,12 +441,7 @@ struct bencoding * bdecode (char * s, int len, enum benc opts) { #define ISKEY !ISVAL if (ISDICT && ISVAL) arbeit->key = oldarbeit; - if (arbeit->type & num) - c = arbeit->value+arbeit->valuelen+1; - else if (arbeit->type & string) - c = arbeit->value+arbeit->valuelen; - else if (arbeit->type & (list | dict)) - c += arbeit->valuelen; + cp = arbeit->after; arbeit->prev = ISDICT ? ISVAL ? oldoldarbeit : oldarbeit : oldarbeit; arbeit->index = ISDICT ? index/2 : index; if (ISLIST) { @@ -355,7 +460,7 @@ struct bencoding * bdecode (char * s, int len, enum benc opts) { oldarbeit = arbeit; index++; } - b->valuelen = c-s + 1; + b->after = cp+1; b->type = b->type | opts; if (ISDICT && ISVAL) // e je torej value, če je prej samoten key free_bencoding(oldarbeit); // this key would be otherwise leaked @@ -371,28 +476,25 @@ struct bencoding * bdecode (char * s, int len, enum benc opts) { } b->type = string; if (len == -1 || (b->value = memchr(s, ':', len))) { - b->valuelen = strtol(s, &c, 10); - b->value = c+1; - if (len != -1 && (unsigned)len < b->valuelen + (b->value - s) /* len minus prefix; strlen & colon */) - b->valuelen = len - (b->value - s); /* malformed bencoded data, truncating string */ + b->valuelen = strtol(s, &ch, 10); + if (len != -1 && (unsigned)len < b->valuelen + (ch+1 - s) /* len minus prefix; strlen & colon */) + b->valuelen = len - (ch+1 - s); /* malformed bencoded data, truncating string */ + b->value = malloc(b->valuelen+1); + strncpy(b->value, ch+1, b->valuelen); + b->value[b->valuelen] = '\0'; + b->after = ch+1+b->valuelen; + } else { + free(b); + return NULL; } break; } - if (opts & terminate) { - if (len != -1 && b->valuelen+1+(b->value-s) > (unsigned) len) { /* no space for terminator, put it on last char */ - b->oldterminatorls = b->value[b->valuelen-1]; - b->value[b->valuelen-1] = '\0'; - } else { - b->oldterminator = b->value[b->valuelen]; - b->value[b->valuelen] = '\0'; - } - } b->type = b->type | opts; return b; } /** - * returns a pointer to bencoding struct matching bencoding path or NULL if not found + * returns a pointer to bencoding struct matching bencoding path or NULL if not found. * * path key/key2/key3 will given object {"key":{"key2":{"key3":val}}} return val * @@ -415,8 +517,8 @@ struct bencoding * bpath (struct bencoding * benc, const char * key) { while (benc) { if (benc->key && benc->key->type & num) { char buf[512]; - sprintf(buf, "%ld", strtol(key, NULL, 10)); - if (!strncmp(buf, key, len) && benc->key->intvalue == strtol(key, NULL, 10)) { + sprintf(buf, "%ld", benc->key->intvalue); + if (len == strlen(buf) && !strncmp(buf, key, len)) { if (!c) return benc; else @@ -424,7 +526,7 @@ struct bencoding * bpath (struct bencoding * benc, const char * key) { } } if (benc->key && benc->key->type & string) { - if (!strncmp(key, benc->key->value, MIN(benc->key->valuelen, len))) { + if (len == benc->key->valuelen && !strncmp(key, benc->key->value, len)) { if (!c) return benc; else @@ -445,3 +547,30 @@ struct bencoding * bpath (struct bencoding * benc, const char * key) { #define bforeach(elem, list) \ for (struct bencoding * elem = list ? list->child : NULL; elem; elem = elem->next) + +/** + * find a value in a list. returns NULL if not found. + * + * @param benc [in] the bencoding list or dict to look in + * @param str [in] the value + */ + +struct bencoding * bval (struct bencoding * benc, const char * val) { + if (!benc) + return NULL; + if (!benc->child) + return NULL; + benc = benc->child; + while (benc) { + if (benc->type & num) { + char buf[412]; + sprintf(buf, "%ld", benc->intvalue); + if (!strcmp(buf, val)) + return benc; + } + if (benc->type & string && strlen(val) == benc->valuelen && !strncmp(benc->value, val, benc->valuelen)) + return benc; + benc = benc->next; + } + return NULL; +} diff --git a/src/dht.c b/src/dht.c index 629bf26..a8538b5 100644 --- a/src/dht.c +++ b/src/dht.c @@ -1,6 +1,7 @@ struct dht { char id[20]; int socket; + char secret[16]; /**< for calculating opaque write token, random */ }; struct node { char id[20]; @@ -12,3 +13,6 @@ struct node { time_t last; struct sockaddr addr; }; +void token (char * dest, struct sockaddr addr) {š + +} diff --git a/utils/bencoding.c b/utils/bencoding.c index ae80546..1e90737 100644 --- a/utils/bencoding.c +++ b/utils/bencoding.c @@ -8,7 +8,7 @@ #define S0(x) (x ? x : "") int main (int argc, char ** argv) { if (argc < 1+1) - error_at_line(1, 0, __FILE__, __LINE__, "%s encode < json || %s decode < bencoding || %s path path/to/obj < bencoding || %s foreach < bencoding", S0(argv[0]), S0(argv[0]), S0(argv[0]), S0(argv[0])); + error_at_line(1, 0, __FILE__, __LINE__, "%s encode < json || %s decode < bencoding || %s path path/to/obj < bencoding || %s foreach < bencoding || %s val value < bencoding (should output null or value as JSON string if found in array)", S0(argv[0]), S0(argv[0]), S0(argv[0]), S0(argv[0]), S0(argv[0])); if (argv[1][0] == 'p' && argc != 1+2) error_at_line(1, 0, __FILE__, __LINE__, "set path!"); int size = 2048; @@ -23,7 +23,7 @@ int main (int argc, char ** argv) { } if (argv[1][0] == 'e') error_at_line(3, 0, __FILE__, __LINE__, "N/I"); - struct bencoding * bencoding = bdecode(in, size, terminate); + struct bencoding * bencoding = bdecode(in, size, 0); if (argv[1][0] == 'd') { len = b2json_length(bencoding); char out[len+1]; @@ -44,6 +44,16 @@ int main (int argc, char ** argv) { error_at_line(4, 0, __FILE__, __LINE__, "b2json wrote %ld instead of %d bytes.", end-out, len); fprintf(stderr, "len: %d\n", len); } + if (argv[1][0] == 'v') { + len = b2json_length(bval(bencoding, argv[2])); + char out[len+1]; + char * end = b2json(out, bval(bencoding, argv[2])); + *end = '\0'; + puts(out); + if (end - out != len) + error_at_line(4, 0, __FILE__, __LINE__, "b2json wrote %ld instead of %d bytes.", end-out, len); + fprintf(stderr, "len: %d\n", len); + } if (argv[1][0] == 'f') { bforeach (value, bencoding) { len = b2json_length(value->key); -- cgit v1.2.3