From 8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anton=20Luka=20=C5=A0ijanec?= Date: Mon, 21 Nov 2022 20:11:12 +0100 Subject: created utils/bencoding.c, fixed bencoding (for now decode only). TODO: fuzz --- .gitignore | 7 +- Makefile | 21 ----- README.md | 19 ---- makefile | 23 +++++ src/bencoding.c | 259 +++++++++++++++++++++++++++++++++++++++++++++++------- src/utils.c | 4 - utils/bencoding.c | 36 ++++++++ 7 files changed, 293 insertions(+), 76 deletions(-) delete mode 100644 Makefile create mode 100644 makefile delete mode 100644 src/utils.c create mode 100644 utils/bencoding.c diff --git a/.gitignore b/.gitignore index 6df1427..9433c58 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ travnik tmp/ -valgrind-out.txt +valgrind-out.* +test.txt +test-eval +a.out +core +.gdb_history diff --git a/Makefile b/Makefile deleted file mode 100644 index 632a6c5..0000000 --- a/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -DESTDIR=/ - -default: - mkdir tmp -p - gcc -Wno-dangling-else -Wall -Wextra -pedantic -g -Isrc -Itmp src/main.c -lm -otravnik - -install: - mkdir -p $(DESTDIR)/usr/bin/ - cp travnik $(DESTDIR)/usr/bin/ - -distclean: - rm travnik tmp -rf - -clean: - rm travnik tmp -rf - -prepare: - sudo apt install libmicrohttpd-dev build-essential default-libmysqlclient-dev -y - -valgrind: - valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose --log-file=valgrind-out.txt ./travnik diff --git a/README.md b/README.md index bfa6215..db218ef 100644 --- a/README.md +++ b/README.md @@ -12,23 +12,4 @@ travnik implements BEP-3 (bencoding), BEP-5 (DHT) and BEP-9 (metadata exchange) ## installation -debian and derivatives users add my package repository (prog.sijanec.eu)[https://prog.sijanec.eu] and `apt install travnik`. - other users compile from source with `make`. - -## usage - -debian users get a systemd service installed and can start `travnik` with `service travnik start`. logs are written to the default log (`journalctl -xe` or `service travnik status`). `travnik` is started as the user `nobody`. - -other users can execute `./travnik`; the command hangs and reports logs to `stderr`. it's important to start `travnik` as a user with the least amount of priviledges, `nobody` for example. - -`travnik` then starts crawling and opens up a web interface on port 8728, open [http://localhost:8728](http://localhost:8728) for searching and exporting the database. 8782 can be remembered by picturing letters TRAV on a mobile phone keyboard. - -## prebuilt binaries - -... are available for my CI/CD server. check the badge below if the latest release was built successfully. - -[![Build Status](https://jenkins.sijanec.eu/job/sear.c/badge/icon)](https://jenkins.sijanec.eu/job/travnik/) - -* `amd64`: [https://cargova.sijanec.eu/prog/travnik/](https://of.sijanec.eu/prog/travnik/) -* `arm64`: [https://of.sijanec.eu/prog/travnik/](https://of.sijanec.eu/prog/travnik/) diff --git a/makefile b/makefile new file mode 100644 index 0000000..5de5868 --- /dev/null +++ b/makefile @@ -0,0 +1,23 @@ +DESTDIR=/ +CC=cc + +default: + mkdir -p tmp + $(CC) -Wall -Wextra -Wformat -pedantic -g -Isrc -Itmp $(CFLAGS) src/main.c -otravnik -lm $(LDFLAGS) + +install: + mkdir -p $(DESTDIR)/usr/bin/ + cp travnik $(DESTDIR)/usr/bin/ + +distclean: clean + +clean: + rm -rf travnik tmp test-eval + +prepare: + sudo apt install libmicrohttpd-dev build-essential default-libmysqlclient-dev -y + +valgrind: + valgrind --error-exitcode=59 --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose --log-file=valgrind-out.txt $(CMD) + +.PHONY: default install distclean clean prepare valgrind diff --git a/src/bencoding.c b/src/bencoding.c index 8c32399..d1324c7 100644 --- a/src/bencoding.c +++ b/src/bencoding.c @@ -21,11 +21,10 @@ struct bencoding { struct bencoding * next; /**< NULL if element is not member of a list or dict */ struct bencoding * prev; struct bencoding * child; /**< NULL if element is not a list or dict or if it has 0 children */ - struct bencoding * parent; - enum benc type; /**< type of this element */ - struct bencoding * key; /**< the key element, string according to the spec, applicable for list and dict */ - char * value; /**< always set to the content of the element, value is not null terminated unless terminate opt is set */ - size_t valuelen; /**< length of string value, as value is not null terminated */ + enum benc type; /**< type | opts of this element */ + struct bencoding * key; /**< the key element, string according to the spec, applicable for dict */ + char * value; /**< set to the content of the element, value is not null terminated unless terminate opt is set. NULL for dict and list. */ + size_t valuelen; /**< length of string value, as value is not null terminated, internal value for list or dict. */ int intvalue; int index; char oldterminator; /**< when opts&terminate, the character that was replaced with \0 is stored here */ @@ -41,12 +40,10 @@ struct bencoding { void free_bencoding (struct bencoding * b) { if (!b) return; - struct bencoding * s = b; - while (s) /* we free all siblings should they exist */ - free_bencoding(s = s->next); free_bencoding(b->child); /* we free the child should it exist. it can be NULL. */ free_bencoding(b->key); /* should this be an element of a dict, free the key */ - free(b); /* we free the element */ + free_bencoding(b->next); + free(b); return; } @@ -57,6 +54,201 @@ void free_bencoding (struct bencoding * b) { #define MAX(x, y) ((x) >= (y) ? (x) : (y)) #define MIN(x, y) ((x) <= (y) ? (x) : (y)) +/** + * return how much space a character in a string uses + * + * @param a [in] the character in question + */ + +int b2json_charsize (char a) { + if (a == '"') + return 2; + if (a == '\\') + return 2; + if (a == '\b') + return 2; + if (a == '\f') + return 2; + if (a == '\n') + return 2; + if (a == '\r') + return 2; + if (a == '\t') + return 2; + if (a < ' ') + return 6; + return 1; +} + +/** + * write a string representation of a character in a JSON string + * + * @param dest [out] destination + * @param a [in] the character in question + * @return the destination pointer, incremented for the number of bytes written + */ + +char * b2json_charrepr (char * dest, char a) { + switch (a) { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-truncation" + case '"': + strncpy(dest, "\\\"", 2); + return dest+2; + case '\\': + strncpy(dest, "\\\\", 2); + return dest+2; + case '\b': + strncpy(dest, "\\b", 2); + return dest+2; + case '\f': + strncpy(dest, "\\f", 2); + return dest+2; + case '\n': + strncpy(dest, "\\n", 2); + return dest+2; + case '\r': + strncpy(dest, "\\r", 2); + return dest+2; + case '\t': + strncpy(dest, "\\t", 2); + return dest+2; + default: + if (a < ' ') { + char buf[7]; + sprintf(buf, "\\u00%02x", a); + strncpy(dest, buf, 6); + return dest+6; + } else { + *dest++ = a; + return dest; + } +#pragma GCC diagnostic pop + } +} + + +/** + * get size required for JSON representation of a bencoding struct. terminating NULL byte is not counted, because b2json does not write it. write it yourself. + * + * @param b [in] bencoding structure of a bdecoded element + */ + +int b2json_length (struct bencoding * b) { + if (!b) + return 4; + if (b->type & string) { + int size = 2; + if (b->oldterminatorls) + size += b2json_charsize(b->oldterminatorls) - b2json_charsize('\0'); + for (size_t i = 0; i < b->valuelen; i++) + size += b2json_charsize(b->value[i]); + return size; + } + if (b->type & num) { + char buf[512]; + sprintf(buf, "%d", b->intvalue); + return strlen(buf); + } + if (b->type & list) { + if (!b->child) + return 2; + struct bencoding * t = b->child; + int size = 2 + b2json_length(t); + while (t->next) { + t = t->next; + size += b2json_length(t) + 1; + } + return size; + } + if (b->type & dict) { + if (!b->child) + return 2; + struct bencoding * t = b->child; + int size = 3 + b2json_length(t) + b2json_length(t->key); + while (t->next) { + t = t->next; + size += 1 + b2json_length(t) + 1 + b2json_length(t->key); + } + return size; + } + return 5; +} + +/** + * write json representation of a bencoding struct. does not write terminating nullbyte, b2json_length does not include it in count. add it yourself. should write exactly b2json_length bytes. + * + * writes false when struct has an incorrect type and null when NULL pointer is passed, this is in ordnung with b2json_length. + * + * @param dest [in] destination + * @param b [in] bencoding structure of a bdecoded element + * @return the destination pointer, incremented for the number of bytes written + */ + +char * b2json (char * dest, struct bencoding * b) { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-truncation" + if (!b) { + strncpy(dest, "null", 4); + return dest+4; + } + if (b->type & string) { + *dest++ = '"'; + for (size_t i = 0; i < b->valuelen; i++) + if (i == b->valuelen-1 && b->oldterminatorls) + dest = b2json_charrepr(dest, b->oldterminatorls); + else + dest = b2json_charrepr(dest, b->value[i]); + *dest++ = '"'; + return dest; + } + if (b->type & num) { + char buf[512]; + sprintf(buf, "%d", b->intvalue); + strncpy(dest, buf, strlen(buf)); + return dest+strlen(buf); + } + if (b->type & list) { + if (!b->child) { + strncpy(dest, "[]", 2); + return dest+2; + } + struct bencoding * t = b->child; + *dest++ = '['; + dest = b2json(dest, t); + while (t->next) { + t = t->next; + *dest++ = ','; + dest = b2json(dest, t); + } + *dest++ = ']'; + return dest; + } + if (b->type & dict) { + if (!b->child) { + strncpy(dest, "{}", 2); + return dest+2; + } + *dest++ = '{'; + struct bencoding * t = b->child; + dest = b2json(dest, t->key); + *dest++ = ':'; + dest = b2json(dest, t); + while (t->next) { + t = t->next; + *dest++ = ','; + dest = b2json(dest, t->key); + *dest++ = ':'; + dest = b2json(dest, t); + } + *dest++ = '}'; + return dest; + } + strncpy(dest, "false", 4); + return dest+4; +#pragma GCC diagnostic pop +} + /** * macro that allocas a C string from a bencoding string or other element. non-string elements return their raw bencoded content. * dereferences structure without checking. @@ -108,7 +300,7 @@ struct bencoding * bdecode (char * s, int len, enum benc opts) { b->value = s+1; if (len == -1 || memchr(s, 'e', len)) { /* correct string or end found */ b->intvalue = strtol(b->value, &c, 10); - b->valuelen = (c-1)-b->value; + b->valuelen = c-b->value; } break; case 'd': /* dict */ @@ -117,49 +309,54 @@ struct bencoding * bdecode (char * s, int len, enum benc opts) { case 'l': /* list */ if (!b->type) b->type = list; - c = s; + c = s+1; struct bencoding * arbeit = NULL; struct bencoding * oldarbeit = NULL; struct bencoding * oldoldarbeit = NULL; /* for dicts, holds previous value */ int index = 0; - b->value = s+1; - char oldterminator = '\0'; - while (len == -1 || ++c <= s+len) { /* s+len is max we are allowed to read */ - if (opts&terminate && oldarbeit && oldarbeit->oldterminator) - c[0] = oldterminator; + while (len == -1 || c <= s+len) { /* s+len is max we are allowed to read */ + if (oldarbeit && oldarbeit->type & string && oldarbeit->type & terminate && oldarbeit->oldterminator) + c[0] = oldarbeit->oldterminator; arbeit = bdecode(c, len == -1 ? -1 : len-(c-s), opts); - if (opts&terminate && oldarbeit && oldarbeit->oldterminator) + if (oldarbeit && oldarbeit->type & string && oldarbeit->type & terminate && oldarbeit->oldterminator) c[0] = '\0'; if (!arbeit) /* bdecoding failed or last element */ break; -#define ISDICT (b->type == dict) +#define ISDICT (b->type & dict) #define ISLIST !ISDICT -#define ISVAL (index % 2 == 1) +#define ISVAL (index % 2) #define ISKEY !ISVAL if (ISDICT && ISVAL) arbeit->key = oldarbeit; - c = arbeit->value+arbeit->valuelen; /* this is safe, function's vallen should not be in forbidden */ - if (arbeit->type&(num|dict|list) && c <= s+len && c[0] == 'e') /* but vallen+1 may be */ - c++; - c--; /* while cond will inc again */ + if (arbeit->type & num) + c = arbeit->value+arbeit->valuelen+1; + else if (arbeit->type & string) + c = arbeit->value+arbeit->valuelen; + else if (arbeit->type & (list | dict)) + c += arbeit->valuelen; arbeit->prev = ISDICT ? ISVAL ? oldoldarbeit : oldarbeit : oldarbeit; arbeit->index = ISDICT ? index/2 : index; - if (ISLIST) + if (ISLIST) { if (index) oldarbeit->next = arbeit; else b->child = arbeit; - if (ISDICT) + } + if (ISDICT) { if (index == 1) - b->child = oldarbeit; + b->child = arbeit; else if (ISVAL) oldoldarbeit->next = arbeit; + } oldoldarbeit = oldarbeit; oldarbeit = arbeit; index++; } - b->valuelen = (c-1)-b->value; /* c-1 is the last character in list or last readable character if out of l */ - break; + b->valuelen = c-s + 1; + b->type = b->type | opts; + if (ISDICT && ISVAL) // e je torej value, če je prej samoten key + free_bencoding(oldarbeit); // this key would be otherwise leaked + return b; case 'e': /* end of list/dict */ free(b); return NULL; @@ -170,15 +367,15 @@ struct bencoding * bdecode (char * s, int len, enum benc opts) { } b->type = string; if (len == -1 || (b->value = memchr(s, ':', len))) { - b->valuelen = strtol(s, NULL, 10); - b->value++; + b->valuelen = strtol(s, &c, 10); + b->value = c+1; if (len != -1 && (unsigned)len < b->valuelen + (b->value - s) /* len minus prefix; strlen & colon */) b->valuelen = len - (b->value - s); /* malformed bencoded data, truncating string */ } break; } if (opts & terminate) { - if (len != -1 && b->valuelen+1+(b->value-s) < (unsigned) len) { /* no space for terminator, put it on last char */ + if (len != -1 && b->valuelen+1+(b->value-s) > (unsigned) len) { /* no space for terminator, put it on last char */ b->oldterminatorls = b->value[b->valuelen-1]; b->value[b->valuelen-1] = '\0'; } else { diff --git a/src/utils.c b/src/utils.c deleted file mode 100644 index 237f701..0000000 --- a/src/utils.c +++ /dev/null @@ -1,4 +0,0 @@ -#define free_char_after __attribute__ ((__cleanup__ (free_char_pointer))) -void free_char_pointer (char ** p) { - free(*p); -} diff --git a/utils/bencoding.c b/utils/bencoding.c new file mode 100644 index 0000000..a60e1b3 --- /dev/null +++ b/utils/bencoding.c @@ -0,0 +1,36 @@ +#include +#include +#include +#include +#include +#include +#include +#define S0(x) (x ? x : "") +int main (int argc, char ** argv) { + if (argc != 1+1) + error_at_line(1, 0, __FILE__, __LINE__, "%s encode < json || %s decode < bencoding", S0(argv[0]), S0(argv[0])); + int size = 2048; + int len = 0; + char * in = malloc(size); + while (!feof(stdin) && !ferror(stdin)) { + if (!in) + error_at_line(2, 0, __FILE__, __LINE__, "heap alloc failed"); + len += fread(in, 1, size-len-1, stdin); + if ((size - len) < 1024) + in = realloc(in, size *= 2); + } + if (argv[1][0] == 'e') + error_at_line(3, 0, __FILE__, __LINE__, "N/I"); + struct bencoding * bencoding = bdecode(in, size, terminate); + len = b2json_length(bencoding); + char out[len+1]; + char * end = b2json(out, bencoding); + *end = '\0'; + puts(out); + if (end - out != len) + error_at_line(4, 0, __FILE__, __LINE__, "b2json wrote %ld instead of %d bytes.", end-out, len); + fprintf(stderr, "len: %d\n", len); + free_bencoding(bencoding); + free(in); + return 0; +} -- cgit v1.2.3