summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsijanec <anton@sijanec.eu>2021-05-04 23:20:33 +0200
committersijanec <anton@sijanec.eu>2021-05-04 23:20:33 +0200
commit872a765eeebfeea314aae1f3a356f8ac280526aa (patch)
treef8acd64478f7be1f3bb5efb6ccd2e4d613f9c73c
downloadtravnik-872a765eeebfeea314aae1f3a356f8ac280526aa.tar
travnik-872a765eeebfeea314aae1f3a356f8ac280526aa.tar.gz
travnik-872a765eeebfeea314aae1f3a356f8ac280526aa.tar.bz2
travnik-872a765eeebfeea314aae1f3a356f8ac280526aa.tar.lz
travnik-872a765eeebfeea314aae1f3a356f8ac280526aa.tar.xz
travnik-872a765eeebfeea314aae1f3a356f8ac280526aa.tar.zst
travnik-872a765eeebfeea314aae1f3a356f8ac280526aa.zip
-rw-r--r--.gitignore3
-rw-r--r--Makefile21
-rw-r--r--README.md34
-rw-r--r--src/bencoding.c197
-rw-r--r--src/main.c13
-rw-r--r--src/utils.c4
6 files changed, 272 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..6df1427
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+travnik
+tmp/
+valgrind-out.txt
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..632a6c5
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,21 @@
+DESTDIR=/
+
+default:
+ mkdir tmp -p
+ gcc -Wno-dangling-else -Wall -Wextra -pedantic -g -Isrc -Itmp src/main.c -lm -otravnik
+
+install:
+ mkdir -p $(DESTDIR)/usr/bin/
+ cp travnik $(DESTDIR)/usr/bin/
+
+distclean:
+ rm travnik tmp -rf
+
+clean:
+ rm travnik tmp -rf
+
+prepare:
+ sudo apt install libmicrohttpd-dev build-essential default-libmysqlclient-dev -y
+
+valgrind:
+ valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose --log-file=valgrind-out.txt ./travnik
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..bfa6215
--- /dev/null
+++ b/README.md
@@ -0,0 +1,34 @@
+# travnik - NOT IMPLEMENTED YET, COME BACK LATER!
+
+... is a tool that connects to the bittorent dht network and waits for infohashes of torrents, fetches their metadata, stores it in a database and indexes them via a web-interface. It's meant to be a lighter and simpler alternative to [btdig.com's erlang crawler](https://btdig.com).
+
+travnik operates single-threadedly, including the BEP-5 (DHT), BEP-9 (metadata exchange) and HTTP client.
+
+travnik implements BEP-3 (bencoding), BEP-5 (DHT) and BEP-9 (metadata exchange) itself, other things (mysql client and http server) are handled by libraries.
+
+## requirements
+
+`build-essential`, `libmicrohttpd-dev`, `default-libmysqlclient-dev`
+
+## installation
+
+debian and derivatives users add my package repository (prog.sijanec.eu)[https://prog.sijanec.eu] and `apt install travnik`.
+
+other users compile from source with `make`.
+
+## usage
+
+debian users get a systemd service installed and can start `travnik` with `service travnik start`. logs are written to the default log (`journalctl -xe` or `service travnik status`). `travnik` is started as the user `nobody`.
+
+other users can execute `./travnik`; the command hangs and reports logs to `stderr`. it's important to start `travnik` as a user with the least amount of priviledges, `nobody` for example.
+
+`travnik` then starts crawling and opens up a web interface on port 8728, open [http://localhost:8728](http://localhost:8728) for searching and exporting the database. 8782 can be remembered by picturing letters TRAV on a mobile phone keyboard.
+
+## prebuilt binaries
+
+... are available for my CI/CD server. check the badge below if the latest release was built successfully.
+
+[![Build Status](https://jenkins.sijanec.eu/job/sear.c/badge/icon)](https://jenkins.sijanec.eu/job/travnik/)
+
+* `amd64`: [https://cargova.sijanec.eu/prog/travnik/](https://of.sijanec.eu/prog/travnik/)
+* `arm64`: [https://of.sijanec.eu/prog/travnik/](https://of.sijanec.eu/prog/travnik/)
diff --git a/src/bencoding.c b/src/bencoding.c
new file mode 100644
index 0000000..8c32399
--- /dev/null
+++ b/src/bencoding.c
@@ -0,0 +1,197 @@
+/**
+ * enum of all possible bencoding types and some options to use
+ * to check a type, use ORing, not direct comparison, as bdecoded structs inherit opts from bdecode function in their ->types
+ */
+
+enum benc {
+ string = 1 << 0,
+ num = 1 << 1,
+ list = 1 << 2,
+ dict = 1 << 3,
+ terminate = 1 << 4 /**< bencoding strings are terminated and you do not need bencoding_string to use them. breaks input str.
+ note: when out of space, the terminator is placed instead of the last character of the string. **/
+};
+
+/**
+ * structure representation of bencoded data
+ * the structure does not copy any data, it's assumed that the origin string that was used to create the structure does not change
+ */
+
+struct bencoding {
+ struct bencoding * next; /**< NULL if element is not member of a list or dict */
+ struct bencoding * prev;
+ struct bencoding * child; /**< NULL if element is not a list or dict or if it has 0 children */
+ struct bencoding * parent;
+ enum benc type; /**< type of this element */
+ struct bencoding * key; /**< the key element, string according to the spec, applicable for list and dict */
+ char * value; /**< always set to the content of the element, value is not null terminated unless terminate opt is set */
+ size_t valuelen; /**< length of string value, as value is not null terminated */
+ int intvalue;
+ int index;
+ char oldterminator; /**< when opts&terminate, the character that was replaced with \0 is stored here */
+ char oldterminatorls; /**< when opts&terminate when there was no more space, replaced character is stored here.
+ if there'd be enough space, the next one of this one would be replaced.
+ this is used by bencoding string, as it will repair the original string and restore the last character. */
+};
+
+/**
+ * frees the passed bencoding struct or performs no action if NULL was passed. caller should NULL the pointer to prevent reuse.
+ */
+
+void free_bencoding (struct bencoding * b) {
+ if (!b)
+ return;
+ struct bencoding * s = b;
+ while (s) /* we free all siblings should they exist */
+ free_bencoding(s = s->next);
+ free_bencoding(b->child); /* we free the child should it exist. it can be NULL. */
+ free_bencoding(b->key); /* should this be an element of a dict, free the key */
+ free(b); /* we free the element */
+ return;
+}
+
+/**
+ * helper macros for number comparisons
+ */
+
+#define MAX(x, y) ((x) >= (y) ? (x) : (y))
+#define MIN(x, y) ((x) <= (y) ? (x) : (y))
+
+/**
+ * macro that allocas a C string from a bencoding string or other element. non-string elements return their raw bencoded content.
+ * dereferences structure without checking.
+ * resulting C string is NULL terminated, cannot contain NULL, DO NOT dereference bytes after the NULL terminator.
+ *
+ * @param stru [in] bencoding structure of a bdecoded element
+ * @param char [out] char * type variable that will contain allocad string. DO NOT ATTEMPT TO FREE; automatic free at return!
+ */
+
+#define bencoding_string(stru, char) \
+ char = alloca(stru->valuelen+1); \
+ snprintf(char, stru->valuelen+1, "%.*s", stru->valuelen, stru->value); \
+ if (stru->oldterminatorls) \
+ char[stru->valuelen-1] = char[stru->oldterminatorls]; \
+
+/**
+ * bdecodes a bencoded structure from a string into a bencoding structure that must be free_bencodinged by the caller.
+ *
+ * nonstandard things: this parser allows for dict keys to be of any type, valuekey
+ *
+ * by default input string is unmodified, unless terminate opt is set.
+ *
+ * @param len [in] * if set to -1, string is assumed to be correct and not NULL terminated, NULLs may be in strings.
+ * - malicious strings may trigger reads past the end of the buffer, which may lead to undefined
+ * behaviour, crashes (DoS) or leaks of content, stored in memory.
+ * - if opts&terminate, another character will be written after the bencoded structure in memory if
+ * that structure is a string. beware and have space allocated for it!
+ * * if set to -2, string is assumed to be NULL terminated and no further reading will be done after the NULL.
+ * - if such terminator breaks an incomplete element, the resulting structure may be incomplete, but
+ * will be correct - for example valuelen of a misterminated string will correctly be shortened.
+ * * if set to a positive number, reading will only be allowed up to that many characters.
+ * - if the input string reads the end and the structure is incomplete, same thing as with -2 happens.
+ * - if the structure ends cleanly (string length satisfied or end of list, dict or num found),
+ * processing stops, no mather how many characters of len are left.
+ * @param opts [in] sets options. do not set the type bits here, this is the same enum as the ->type enum of returned struct.
+ * opts will be reflected in the ->type of the returning struct. opts will apply to childs of lists&dicts too.
+ */
+
+struct bencoding * bdecode (char * s, int len, enum benc opts) {
+ if (!s || len < -2 || (len >= 0 && len < 2 /* 2 being the smallest bencoding string */))
+ return NULL;
+ if (len == -2)
+ len = strlen(s);
+ struct bencoding * b = calloc(1, sizeof(struct bencoding)); /* SEGV if OOM */
+ char * c = NULL;
+ switch (s[0]) {
+ case 'i': /* num */
+ b->type = num;
+ b->value = s+1;
+ if (len == -1 || memchr(s, 'e', len)) { /* correct string or end found */
+ b->intvalue = strtol(b->value, &c, 10);
+ b->valuelen = (c-1)-b->value;
+ }
+ break;
+ case 'd': /* dict */
+ b->type = dict;
+ __attribute__((fallthrough));
+ case 'l': /* list */
+ if (!b->type)
+ b->type = list;
+ c = s;
+ struct bencoding * arbeit = NULL;
+ struct bencoding * oldarbeit = NULL;
+ struct bencoding * oldoldarbeit = NULL; /* for dicts, holds previous value */
+ int index = 0;
+ b->value = s+1;
+ char oldterminator = '\0';
+ while (len == -1 || ++c <= s+len) { /* s+len is max we are allowed to read */
+ if (opts&terminate && oldarbeit && oldarbeit->oldterminator)
+ c[0] = oldterminator;
+ arbeit = bdecode(c, len == -1 ? -1 : len-(c-s), opts);
+ if (opts&terminate && oldarbeit && oldarbeit->oldterminator)
+ c[0] = '\0';
+ if (!arbeit) /* bdecoding failed or last element */
+ break;
+#define ISDICT (b->type == dict)
+#define ISLIST !ISDICT
+#define ISVAL (index % 2 == 1)
+#define ISKEY !ISVAL
+ if (ISDICT && ISVAL)
+ arbeit->key = oldarbeit;
+ c = arbeit->value+arbeit->valuelen; /* this is safe, function's vallen should not be in forbidden */
+ if (arbeit->type&(num|dict|list) && c <= s+len && c[0] == 'e') /* but vallen+1 may be */
+ c++;
+ c--; /* while cond will inc again */
+ arbeit->prev = ISDICT ? ISVAL ? oldoldarbeit : oldarbeit : oldarbeit;
+ arbeit->index = ISDICT ? index/2 : index;
+ if (ISLIST)
+ if (index)
+ oldarbeit->next = arbeit;
+ else
+ b->child = arbeit;
+ if (ISDICT)
+ if (index == 1)
+ b->child = oldarbeit;
+ else if (ISVAL)
+ oldoldarbeit->next = arbeit;
+ oldoldarbeit = oldarbeit;
+ oldarbeit = arbeit;
+ index++;
+ }
+ b->valuelen = (c-1)-b->value; /* c-1 is the last character in list or last readable character if out of l */
+ break;
+ case 'e': /* end of list/dict */
+ free(b);
+ return NULL;
+ default:
+ if (!(s[0] >= '0' && s[0] <= '9')) { /* not a string. not checking this would allow DoS for parsing "lx" */
+ free(b);
+ return NULL;
+ }
+ b->type = string;
+ if (len == -1 || (b->value = memchr(s, ':', len))) {
+ b->valuelen = strtol(s, NULL, 10);
+ b->value++;
+ if (len != -1 && (unsigned)len < b->valuelen + (b->value - s) /* len minus prefix; strlen & colon */)
+ b->valuelen = len - (b->value - s); /* malformed bencoded data, truncating string */
+ }
+ break;
+ }
+ if (opts & terminate) {
+ if (len != -1 && b->valuelen+1+(b->value-s) < (unsigned) len) { /* no space for terminator, put it on last char */
+ b->oldterminatorls = b->value[b->valuelen-1];
+ b->value[b->valuelen-1] = '\0';
+ } else {
+ b->oldterminator = b->value[b->valuelen];
+ b->value[b->valuelen] = '\0';
+ }
+ }
+ b->type = b->type | opts;
+ return b;
+}
+
+/**
+ * returns a pointer to bencoding struct matching bencoding path or NULL if not found
+ *
+ * [xxx] specifies xxxth child of a dict or list. if
+ */
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..74a9d1d
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,13 @@
+/* system libraries */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* my code */
+#include <utils.c>
+#include <bencoding.c>
+
+int main (int argc, char ** argv) {
+ char * val free_char_after = malloc(16);
+ return argv ? argc : 69;
+}
diff --git a/src/utils.c b/src/utils.c
new file mode 100644
index 0000000..237f701
--- /dev/null
+++ b/src/utils.c
@@ -0,0 +1,4 @@
+#define free_char_after __attribute__ ((__cleanup__ (free_char_pointer)))
+void free_char_pointer (char ** p) {
+ free(*p);
+}