From 872a765eeebfeea314aae1f3a356f8ac280526aa Mon Sep 17 00:00:00 2001 From: sijanec Date: Tue, 4 May 2021 23:20:33 +0200 Subject: initial commit, UNTESTED bencoding parser --- .gitignore | 3 + Makefile | 21 ++++++ README.md | 34 ++++++++++ src/bencoding.c | 197 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.c | 13 ++++ src/utils.c | 4 ++ 6 files changed, 272 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 README.md create mode 100644 src/bencoding.c create mode 100644 src/main.c create mode 100644 src/utils.c diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6df1427 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +travnik +tmp/ +valgrind-out.txt diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..632a6c5 --- /dev/null +++ b/Makefile @@ -0,0 +1,21 @@ +DESTDIR=/ + +default: + mkdir tmp -p + gcc -Wno-dangling-else -Wall -Wextra -pedantic -g -Isrc -Itmp src/main.c -lm -otravnik + +install: + mkdir -p $(DESTDIR)/usr/bin/ + cp travnik $(DESTDIR)/usr/bin/ + +distclean: + rm travnik tmp -rf + +clean: + rm travnik tmp -rf + +prepare: + sudo apt install libmicrohttpd-dev build-essential default-libmysqlclient-dev -y + +valgrind: + valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose --log-file=valgrind-out.txt ./travnik diff --git a/README.md b/README.md new file mode 100644 index 0000000..bfa6215 --- /dev/null +++ b/README.md @@ -0,0 +1,34 @@ +# travnik - NOT IMPLEMENTED YET, COME BACK LATER! + +... is a tool that connects to the bittorent dht network and waits for infohashes of torrents, fetches their metadata, stores it in a database and indexes them via a web-interface. It's meant to be a lighter and simpler alternative to [btdig.com's erlang crawler](https://btdig.com). + +travnik operates single-threadedly, including the BEP-5 (DHT), BEP-9 (metadata exchange) and HTTP client. + +travnik implements BEP-3 (bencoding), BEP-5 (DHT) and BEP-9 (metadata exchange) itself, other things (mysql client and http server) are handled by libraries. + +## requirements + +`build-essential`, `libmicrohttpd-dev`, `default-libmysqlclient-dev` + +## installation + +debian and derivatives users add my package repository (prog.sijanec.eu)[https://prog.sijanec.eu] and `apt install travnik`. + +other users compile from source with `make`. + +## usage + +debian users get a systemd service installed and can start `travnik` with `service travnik start`. logs are written to the default log (`journalctl -xe` or `service travnik status`). `travnik` is started as the user `nobody`. + +other users can execute `./travnik`; the command hangs and reports logs to `stderr`. it's important to start `travnik` as a user with the least amount of priviledges, `nobody` for example. + +`travnik` then starts crawling and opens up a web interface on port 8728, open [http://localhost:8728](http://localhost:8728) for searching and exporting the database. 8782 can be remembered by picturing letters TRAV on a mobile phone keyboard. + +## prebuilt binaries + +... are available for my CI/CD server. check the badge below if the latest release was built successfully. + +[![Build Status](https://jenkins.sijanec.eu/job/sear.c/badge/icon)](https://jenkins.sijanec.eu/job/travnik/) + +* `amd64`: [https://cargova.sijanec.eu/prog/travnik/](https://of.sijanec.eu/prog/travnik/) +* `arm64`: [https://of.sijanec.eu/prog/travnik/](https://of.sijanec.eu/prog/travnik/) diff --git a/src/bencoding.c b/src/bencoding.c new file mode 100644 index 0000000..8c32399 --- /dev/null +++ b/src/bencoding.c @@ -0,0 +1,197 @@ +/** + * enum of all possible bencoding types and some options to use + * to check a type, use ORing, not direct comparison, as bdecoded structs inherit opts from bdecode function in their ->types + */ + +enum benc { + string = 1 << 0, + num = 1 << 1, + list = 1 << 2, + dict = 1 << 3, + terminate = 1 << 4 /**< bencoding strings are terminated and you do not need bencoding_string to use them. breaks input str. + note: when out of space, the terminator is placed instead of the last character of the string. **/ +}; + +/** + * structure representation of bencoded data + * the structure does not copy any data, it's assumed that the origin string that was used to create the structure does not change + */ + +struct bencoding { + struct bencoding * next; /**< NULL if element is not member of a list or dict */ + struct bencoding * prev; + struct bencoding * child; /**< NULL if element is not a list or dict or if it has 0 children */ + struct bencoding * parent; + enum benc type; /**< type of this element */ + struct bencoding * key; /**< the key element, string according to the spec, applicable for list and dict */ + char * value; /**< always set to the content of the element, value is not null terminated unless terminate opt is set */ + size_t valuelen; /**< length of string value, as value is not null terminated */ + int intvalue; + int index; + char oldterminator; /**< when opts&terminate, the character that was replaced with \0 is stored here */ + char oldterminatorls; /**< when opts&terminate when there was no more space, replaced character is stored here. + if there'd be enough space, the next one of this one would be replaced. + this is used by bencoding string, as it will repair the original string and restore the last character. */ +}; + +/** + * frees the passed bencoding struct or performs no action if NULL was passed. caller should NULL the pointer to prevent reuse. + */ + +void free_bencoding (struct bencoding * b) { + if (!b) + return; + struct bencoding * s = b; + while (s) /* we free all siblings should they exist */ + free_bencoding(s = s->next); + free_bencoding(b->child); /* we free the child should it exist. it can be NULL. */ + free_bencoding(b->key); /* should this be an element of a dict, free the key */ + free(b); /* we free the element */ + return; +} + +/** + * helper macros for number comparisons + */ + +#define MAX(x, y) ((x) >= (y) ? (x) : (y)) +#define MIN(x, y) ((x) <= (y) ? (x) : (y)) + +/** + * macro that allocas a C string from a bencoding string or other element. non-string elements return their raw bencoded content. + * dereferences structure without checking. + * resulting C string is NULL terminated, cannot contain NULL, DO NOT dereference bytes after the NULL terminator. + * + * @param stru [in] bencoding structure of a bdecoded element + * @param char [out] char * type variable that will contain allocad string. DO NOT ATTEMPT TO FREE; automatic free at return! + */ + +#define bencoding_string(stru, char) \ + char = alloca(stru->valuelen+1); \ + snprintf(char, stru->valuelen+1, "%.*s", stru->valuelen, stru->value); \ + if (stru->oldterminatorls) \ + char[stru->valuelen-1] = char[stru->oldterminatorls]; \ + +/** + * bdecodes a bencoded structure from a string into a bencoding structure that must be free_bencodinged by the caller. + * + * nonstandard things: this parser allows for dict keys to be of any type, valuekey + * + * by default input string is unmodified, unless terminate opt is set. + * + * @param len [in] * if set to -1, string is assumed to be correct and not NULL terminated, NULLs may be in strings. + * - malicious strings may trigger reads past the end of the buffer, which may lead to undefined + * behaviour, crashes (DoS) or leaks of content, stored in memory. + * - if opts&terminate, another character will be written after the bencoded structure in memory if + * that structure is a string. beware and have space allocated for it! + * * if set to -2, string is assumed to be NULL terminated and no further reading will be done after the NULL. + * - if such terminator breaks an incomplete element, the resulting structure may be incomplete, but + * will be correct - for example valuelen of a misterminated string will correctly be shortened. + * * if set to a positive number, reading will only be allowed up to that many characters. + * - if the input string reads the end and the structure is incomplete, same thing as with -2 happens. + * - if the structure ends cleanly (string length satisfied or end of list, dict or num found), + * processing stops, no mather how many characters of len are left. + * @param opts [in] sets options. do not set the type bits here, this is the same enum as the ->type enum of returned struct. + * opts will be reflected in the ->type of the returning struct. opts will apply to childs of lists&dicts too. + */ + +struct bencoding * bdecode (char * s, int len, enum benc opts) { + if (!s || len < -2 || (len >= 0 && len < 2 /* 2 being the smallest bencoding string */)) + return NULL; + if (len == -2) + len = strlen(s); + struct bencoding * b = calloc(1, sizeof(struct bencoding)); /* SEGV if OOM */ + char * c = NULL; + switch (s[0]) { + case 'i': /* num */ + b->type = num; + b->value = s+1; + if (len == -1 || memchr(s, 'e', len)) { /* correct string or end found */ + b->intvalue = strtol(b->value, &c, 10); + b->valuelen = (c-1)-b->value; + } + break; + case 'd': /* dict */ + b->type = dict; + __attribute__((fallthrough)); + case 'l': /* list */ + if (!b->type) + b->type = list; + c = s; + struct bencoding * arbeit = NULL; + struct bencoding * oldarbeit = NULL; + struct bencoding * oldoldarbeit = NULL; /* for dicts, holds previous value */ + int index = 0; + b->value = s+1; + char oldterminator = '\0'; + while (len == -1 || ++c <= s+len) { /* s+len is max we are allowed to read */ + if (opts&terminate && oldarbeit && oldarbeit->oldterminator) + c[0] = oldterminator; + arbeit = bdecode(c, len == -1 ? -1 : len-(c-s), opts); + if (opts&terminate && oldarbeit && oldarbeit->oldterminator) + c[0] = '\0'; + if (!arbeit) /* bdecoding failed or last element */ + break; +#define ISDICT (b->type == dict) +#define ISLIST !ISDICT +#define ISVAL (index % 2 == 1) +#define ISKEY !ISVAL + if (ISDICT && ISVAL) + arbeit->key = oldarbeit; + c = arbeit->value+arbeit->valuelen; /* this is safe, function's vallen should not be in forbidden */ + if (arbeit->type&(num|dict|list) && c <= s+len && c[0] == 'e') /* but vallen+1 may be */ + c++; + c--; /* while cond will inc again */ + arbeit->prev = ISDICT ? ISVAL ? oldoldarbeit : oldarbeit : oldarbeit; + arbeit->index = ISDICT ? index/2 : index; + if (ISLIST) + if (index) + oldarbeit->next = arbeit; + else + b->child = arbeit; + if (ISDICT) + if (index == 1) + b->child = oldarbeit; + else if (ISVAL) + oldoldarbeit->next = arbeit; + oldoldarbeit = oldarbeit; + oldarbeit = arbeit; + index++; + } + b->valuelen = (c-1)-b->value; /* c-1 is the last character in list or last readable character if out of l */ + break; + case 'e': /* end of list/dict */ + free(b); + return NULL; + default: + if (!(s[0] >= '0' && s[0] <= '9')) { /* not a string. not checking this would allow DoS for parsing "lx" */ + free(b); + return NULL; + } + b->type = string; + if (len == -1 || (b->value = memchr(s, ':', len))) { + b->valuelen = strtol(s, NULL, 10); + b->value++; + if (len != -1 && (unsigned)len < b->valuelen + (b->value - s) /* len minus prefix; strlen & colon */) + b->valuelen = len - (b->value - s); /* malformed bencoded data, truncating string */ + } + break; + } + if (opts & terminate) { + if (len != -1 && b->valuelen+1+(b->value-s) < (unsigned) len) { /* no space for terminator, put it on last char */ + b->oldterminatorls = b->value[b->valuelen-1]; + b->value[b->valuelen-1] = '\0'; + } else { + b->oldterminator = b->value[b->valuelen]; + b->value[b->valuelen] = '\0'; + } + } + b->type = b->type | opts; + return b; +} + +/** + * returns a pointer to bencoding struct matching bencoding path or NULL if not found + * + * [xxx] specifies xxxth child of a dict or list. if + */ diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..74a9d1d --- /dev/null +++ b/src/main.c @@ -0,0 +1,13 @@ +/* system libraries */ +#include +#include +#include + +/* my code */ +#include +#include + +int main (int argc, char ** argv) { + char * val free_char_after = malloc(16); + return argv ? argc : 69; +} diff --git a/src/utils.c b/src/utils.c new file mode 100644 index 0000000..237f701 --- /dev/null +++ b/src/utils.c @@ -0,0 +1,4 @@ +#define free_char_after __attribute__ ((__cleanup__ (free_char_pointer))) +void free_char_pointer (char ** p) { + free(*p); +} -- cgit v1.2.3