summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnton Luka Šijanec <anton@sijanec.eu>2022-11-21 20:11:12 +0100
committerAnton Luka Šijanec <anton@sijanec.eu>2022-11-21 20:11:12 +0100
commit8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb (patch)
tree27508c3ffa05f5934bd7af60c34736d89e0e5954
parentinitial commit, UNTESTED bencoding parser (diff)
downloadtravnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.tar
travnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.tar.gz
travnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.tar.bz2
travnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.tar.lz
travnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.tar.xz
travnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.tar.zst
travnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.zip
-rw-r--r--.gitignore7
-rw-r--r--Makefile21
-rw-r--r--README.md19
-rw-r--r--makefile23
-rw-r--r--src/bencoding.c259
-rw-r--r--src/utils.c4
-rw-r--r--utils/bencoding.c36
7 files changed, 293 insertions, 76 deletions
diff --git a/.gitignore b/.gitignore
index 6df1427..9433c58 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,8 @@
travnik
tmp/
-valgrind-out.txt
+valgrind-out.*
+test.txt
+test-eval
+a.out
+core
+.gdb_history
diff --git a/Makefile b/Makefile
deleted file mode 100644
index 632a6c5..0000000
--- a/Makefile
+++ /dev/null
@@ -1,21 +0,0 @@
-DESTDIR=/
-
-default:
- mkdir tmp -p
- gcc -Wno-dangling-else -Wall -Wextra -pedantic -g -Isrc -Itmp src/main.c -lm -otravnik
-
-install:
- mkdir -p $(DESTDIR)/usr/bin/
- cp travnik $(DESTDIR)/usr/bin/
-
-distclean:
- rm travnik tmp -rf
-
-clean:
- rm travnik tmp -rf
-
-prepare:
- sudo apt install libmicrohttpd-dev build-essential default-libmysqlclient-dev -y
-
-valgrind:
- valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose --log-file=valgrind-out.txt ./travnik
diff --git a/README.md b/README.md
index bfa6215..db218ef 100644
--- a/README.md
+++ b/README.md
@@ -12,23 +12,4 @@ travnik implements BEP-3 (bencoding), BEP-5 (DHT) and BEP-9 (metadata exchange)
## installation
-debian and derivatives users add my package repository (prog.sijanec.eu)[https://prog.sijanec.eu] and `apt install travnik`.
-
other users compile from source with `make`.
-
-## usage
-
-debian users get a systemd service installed and can start `travnik` with `service travnik start`. logs are written to the default log (`journalctl -xe` or `service travnik status`). `travnik` is started as the user `nobody`.
-
-other users can execute `./travnik`; the command hangs and reports logs to `stderr`. it's important to start `travnik` as a user with the least amount of priviledges, `nobody` for example.
-
-`travnik` then starts crawling and opens up a web interface on port 8728, open [http://localhost:8728](http://localhost:8728) for searching and exporting the database. 8782 can be remembered by picturing letters TRAV on a mobile phone keyboard.
-
-## prebuilt binaries
-
-... are available for my CI/CD server. check the badge below if the latest release was built successfully.
-
-[![Build Status](https://jenkins.sijanec.eu/job/sear.c/badge/icon)](https://jenkins.sijanec.eu/job/travnik/)
-
-* `amd64`: [https://cargova.sijanec.eu/prog/travnik/](https://of.sijanec.eu/prog/travnik/)
-* `arm64`: [https://of.sijanec.eu/prog/travnik/](https://of.sijanec.eu/prog/travnik/)
diff --git a/makefile b/makefile
new file mode 100644
index 0000000..5de5868
--- /dev/null
+++ b/makefile
@@ -0,0 +1,23 @@
+DESTDIR=/
+CC=cc
+
+default:
+ mkdir -p tmp
+ $(CC) -Wall -Wextra -Wformat -pedantic -g -Isrc -Itmp $(CFLAGS) src/main.c -otravnik -lm $(LDFLAGS)
+
+install:
+ mkdir -p $(DESTDIR)/usr/bin/
+ cp travnik $(DESTDIR)/usr/bin/
+
+distclean: clean
+
+clean:
+ rm -rf travnik tmp test-eval
+
+prepare:
+ sudo apt install libmicrohttpd-dev build-essential default-libmysqlclient-dev -y
+
+valgrind:
+ valgrind --error-exitcode=59 --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose --log-file=valgrind-out.txt $(CMD)
+
+.PHONY: default install distclean clean prepare valgrind
diff --git a/src/bencoding.c b/src/bencoding.c
index 8c32399..d1324c7 100644
--- a/src/bencoding.c
+++ b/src/bencoding.c
@@ -21,11 +21,10 @@ struct bencoding {
struct bencoding * next; /**< NULL if element is not member of a list or dict */
struct bencoding * prev;
struct bencoding * child; /**< NULL if element is not a list or dict or if it has 0 children */
- struct bencoding * parent;
- enum benc type; /**< type of this element */
- struct bencoding * key; /**< the key element, string according to the spec, applicable for list and dict */
- char * value; /**< always set to the content of the element, value is not null terminated unless terminate opt is set */
- size_t valuelen; /**< length of string value, as value is not null terminated */
+ enum benc type; /**< type | opts of this element */
+ struct bencoding * key; /**< the key element, string according to the spec, applicable for dict */
+ char * value; /**< set to the content of the element, value is not null terminated unless terminate opt is set. NULL for dict and list. */
+ size_t valuelen; /**< length of string value, as value is not null terminated, internal value for list or dict. */
int intvalue;
int index;
char oldterminator; /**< when opts&terminate, the character that was replaced with \0 is stored here */
@@ -41,12 +40,10 @@ struct bencoding {
void free_bencoding (struct bencoding * b) {
if (!b)
return;
- struct bencoding * s = b;
- while (s) /* we free all siblings should they exist */
- free_bencoding(s = s->next);
free_bencoding(b->child); /* we free the child should it exist. it can be NULL. */
free_bencoding(b->key); /* should this be an element of a dict, free the key */
- free(b); /* we free the element */
+ free_bencoding(b->next);
+ free(b);
return;
}
@@ -58,6 +55,201 @@ void free_bencoding (struct bencoding * b) {
#define MIN(x, y) ((x) <= (y) ? (x) : (y))
/**
+ * return how much space a character in a string uses
+ *
+ * @param a [in] the character in question
+ */
+
+int b2json_charsize (char a) {
+ if (a == '"')
+ return 2;
+ if (a == '\\')
+ return 2;
+ if (a == '\b')
+ return 2;
+ if (a == '\f')
+ return 2;
+ if (a == '\n')
+ return 2;
+ if (a == '\r')
+ return 2;
+ if (a == '\t')
+ return 2;
+ if (a < ' ')
+ return 6;
+ return 1;
+}
+
+/**
+ * write a string representation of a character in a JSON string
+ *
+ * @param dest [out] destination
+ * @param a [in] the character in question
+ * @return the destination pointer, incremented for the number of bytes written
+ */
+
+char * b2json_charrepr (char * dest, char a) {
+ switch (a) {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstringop-truncation"
+ case '"':
+ strncpy(dest, "\\\"", 2);
+ return dest+2;
+ case '\\':
+ strncpy(dest, "\\\\", 2);
+ return dest+2;
+ case '\b':
+ strncpy(dest, "\\b", 2);
+ return dest+2;
+ case '\f':
+ strncpy(dest, "\\f", 2);
+ return dest+2;
+ case '\n':
+ strncpy(dest, "\\n", 2);
+ return dest+2;
+ case '\r':
+ strncpy(dest, "\\r", 2);
+ return dest+2;
+ case '\t':
+ strncpy(dest, "\\t", 2);
+ return dest+2;
+ default:
+ if (a < ' ') {
+ char buf[7];
+ sprintf(buf, "\\u00%02x", a);
+ strncpy(dest, buf, 6);
+ return dest+6;
+ } else {
+ *dest++ = a;
+ return dest;
+ }
+#pragma GCC diagnostic pop
+ }
+}
+
+
+/**
+ * get size required for JSON representation of a bencoding struct. terminating NULL byte is not counted, because b2json does not write it. write it yourself.
+ *
+ * @param b [in] bencoding structure of a bdecoded element
+ */
+
+int b2json_length (struct bencoding * b) {
+ if (!b)
+ return 4;
+ if (b->type & string) {
+ int size = 2;
+ if (b->oldterminatorls)
+ size += b2json_charsize(b->oldterminatorls) - b2json_charsize('\0');
+ for (size_t i = 0; i < b->valuelen; i++)
+ size += b2json_charsize(b->value[i]);
+ return size;
+ }
+ if (b->type & num) {
+ char buf[512];
+ sprintf(buf, "%d", b->intvalue);
+ return strlen(buf);
+ }
+ if (b->type & list) {
+ if (!b->child)
+ return 2;
+ struct bencoding * t = b->child;
+ int size = 2 + b2json_length(t);
+ while (t->next) {
+ t = t->next;
+ size += b2json_length(t) + 1;
+ }
+ return size;
+ }
+ if (b->type & dict) {
+ if (!b->child)
+ return 2;
+ struct bencoding * t = b->child;
+ int size = 3 + b2json_length(t) + b2json_length(t->key);
+ while (t->next) {
+ t = t->next;
+ size += 1 + b2json_length(t) + 1 + b2json_length(t->key);
+ }
+ return size;
+ }
+ return 5;
+}
+
+/**
+ * write json representation of a bencoding struct. does not write terminating nullbyte, b2json_length does not include it in count. add it yourself. should write exactly b2json_length bytes.
+ *
+ * writes false when struct has an incorrect type and null when NULL pointer is passed, this is in ordnung with b2json_length.
+ *
+ * @param dest [in] destination
+ * @param b [in] bencoding structure of a bdecoded element
+ * @return the destination pointer, incremented for the number of bytes written
+ */
+
+char * b2json (char * dest, struct bencoding * b) {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstringop-truncation"
+ if (!b) {
+ strncpy(dest, "null", 4);
+ return dest+4;
+ }
+ if (b->type & string) {
+ *dest++ = '"';
+ for (size_t i = 0; i < b->valuelen; i++)
+ if (i == b->valuelen-1 && b->oldterminatorls)
+ dest = b2json_charrepr(dest, b->oldterminatorls);
+ else
+ dest = b2json_charrepr(dest, b->value[i]);
+ *dest++ = '"';
+ return dest;
+ }
+ if (b->type & num) {
+ char buf[512];
+ sprintf(buf, "%d", b->intvalue);
+ strncpy(dest, buf, strlen(buf));
+ return dest+strlen(buf);
+ }
+ if (b->type & list) {
+ if (!b->child) {
+ strncpy(dest, "[]", 2);
+ return dest+2;
+ }
+ struct bencoding * t = b->child;
+ *dest++ = '[';
+ dest = b2json(dest, t);
+ while (t->next) {
+ t = t->next;
+ *dest++ = ',';
+ dest = b2json(dest, t);
+ }
+ *dest++ = ']';
+ return dest;
+ }
+ if (b->type & dict) {
+ if (!b->child) {
+ strncpy(dest, "{}", 2);
+ return dest+2;
+ }
+ *dest++ = '{';
+ struct bencoding * t = b->child;
+ dest = b2json(dest, t->key);
+ *dest++ = ':';
+ dest = b2json(dest, t);
+ while (t->next) {
+ t = t->next;
+ *dest++ = ',';
+ dest = b2json(dest, t->key);
+ *dest++ = ':';
+ dest = b2json(dest, t);
+ }
+ *dest++ = '}';
+ return dest;
+ }
+ strncpy(dest, "false", 4);
+ return dest+4;
+#pragma GCC diagnostic pop
+}
+
+/**
* macro that allocas a C string from a bencoding string or other element. non-string elements return their raw bencoded content.
* dereferences structure without checking.
* resulting C string is NULL terminated, cannot contain NULL, DO NOT dereference bytes after the NULL terminator.
@@ -108,7 +300,7 @@ struct bencoding * bdecode (char * s, int len, enum benc opts) {
b->value = s+1;
if (len == -1 || memchr(s, 'e', len)) { /* correct string or end found */
b->intvalue = strtol(b->value, &c, 10);
- b->valuelen = (c-1)-b->value;
+ b->valuelen = c-b->value;
}
break;
case 'd': /* dict */
@@ -117,49 +309,54 @@ struct bencoding * bdecode (char * s, int len, enum benc opts) {
case 'l': /* list */
if (!b->type)
b->type = list;
- c = s;
+ c = s+1;
struct bencoding * arbeit = NULL;
struct bencoding * oldarbeit = NULL;
struct bencoding * oldoldarbeit = NULL; /* for dicts, holds previous value */
int index = 0;
- b->value = s+1;
- char oldterminator = '\0';
- while (len == -1 || ++c <= s+len) { /* s+len is max we are allowed to read */
- if (opts&terminate && oldarbeit && oldarbeit->oldterminator)
- c[0] = oldterminator;
+ while (len == -1 || c <= s+len) { /* s+len is max we are allowed to read */
+ if (oldarbeit && oldarbeit->type & string && oldarbeit->type & terminate && oldarbeit->oldterminator)
+ c[0] = oldarbeit->oldterminator;
arbeit = bdecode(c, len == -1 ? -1 : len-(c-s), opts);
- if (opts&terminate && oldarbeit && oldarbeit->oldterminator)
+ if (oldarbeit && oldarbeit->type & string && oldarbeit->type & terminate && oldarbeit->oldterminator)
c[0] = '\0';
if (!arbeit) /* bdecoding failed or last element */
break;
-#define ISDICT (b->type == dict)
+#define ISDICT (b->type & dict)
#define ISLIST !ISDICT
-#define ISVAL (index % 2 == 1)
+#define ISVAL (index % 2)
#define ISKEY !ISVAL
if (ISDICT && ISVAL)
arbeit->key = oldarbeit;
- c = arbeit->value+arbeit->valuelen; /* this is safe, function's vallen should not be in forbidden */
- if (arbeit->type&(num|dict|list) && c <= s+len && c[0] == 'e') /* but vallen+1 may be */
- c++;
- c--; /* while cond will inc again */
+ if (arbeit->type & num)
+ c = arbeit->value+arbeit->valuelen+1;
+ else if (arbeit->type & string)
+ c = arbeit->value+arbeit->valuelen;
+ else if (arbeit->type & (list | dict))
+ c += arbeit->valuelen;
arbeit->prev = ISDICT ? ISVAL ? oldoldarbeit : oldarbeit : oldarbeit;
arbeit->index = ISDICT ? index/2 : index;
- if (ISLIST)
+ if (ISLIST) {
if (index)
oldarbeit->next = arbeit;
else
b->child = arbeit;
- if (ISDICT)
+ }
+ if (ISDICT) {
if (index == 1)
- b->child = oldarbeit;
+ b->child = arbeit;
else if (ISVAL)
oldoldarbeit->next = arbeit;
+ }
oldoldarbeit = oldarbeit;
oldarbeit = arbeit;
index++;
}
- b->valuelen = (c-1)-b->value; /* c-1 is the last character in list or last readable character if out of l */
- break;
+ b->valuelen = c-s + 1;
+ b->type = b->type | opts;
+ if (ISDICT && ISVAL) // e je torej value, če je prej samoten key
+ free_bencoding(oldarbeit); // this key would be otherwise leaked
+ return b;
case 'e': /* end of list/dict */
free(b);
return NULL;
@@ -170,15 +367,15 @@ struct bencoding * bdecode (char * s, int len, enum benc opts) {
}
b->type = string;
if (len == -1 || (b->value = memchr(s, ':', len))) {
- b->valuelen = strtol(s, NULL, 10);
- b->value++;
+ b->valuelen = strtol(s, &c, 10);
+ b->value = c+1;
if (len != -1 && (unsigned)len < b->valuelen + (b->value - s) /* len minus prefix; strlen & colon */)
b->valuelen = len - (b->value - s); /* malformed bencoded data, truncating string */
}
break;
}
if (opts & terminate) {
- if (len != -1 && b->valuelen+1+(b->value-s) < (unsigned) len) { /* no space for terminator, put it on last char */
+ if (len != -1 && b->valuelen+1+(b->value-s) > (unsigned) len) { /* no space for terminator, put it on last char */
b->oldterminatorls = b->value[b->valuelen-1];
b->value[b->valuelen-1] = '\0';
} else {
diff --git a/src/utils.c b/src/utils.c
deleted file mode 100644
index 237f701..0000000
--- a/src/utils.c
+++ /dev/null
@@ -1,4 +0,0 @@
-#define free_char_after __attribute__ ((__cleanup__ (free_char_pointer)))
-void free_char_pointer (char ** p) {
- free(*p);
-}
diff --git a/utils/bencoding.c b/utils/bencoding.c
new file mode 100644
index 0000000..a60e1b3
--- /dev/null
+++ b/utils/bencoding.c
@@ -0,0 +1,36 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <error.h>
+#include <signal.h>
+#include <cjson/cJSON.h>
+#include <bencoding.c>
+#define S0(x) (x ? x : "")
+int main (int argc, char ** argv) {
+ if (argc != 1+1)
+ error_at_line(1, 0, __FILE__, __LINE__, "%s encode < json || %s decode < bencoding", S0(argv[0]), S0(argv[0]));
+ int size = 2048;
+ int len = 0;
+ char * in = malloc(size);
+ while (!feof(stdin) && !ferror(stdin)) {
+ if (!in)
+ error_at_line(2, 0, __FILE__, __LINE__, "heap alloc failed");
+ len += fread(in, 1, size-len-1, stdin);
+ if ((size - len) < 1024)
+ in = realloc(in, size *= 2);
+ }
+ if (argv[1][0] == 'e')
+ error_at_line(3, 0, __FILE__, __LINE__, "N/I");
+ struct bencoding * bencoding = bdecode(in, size, terminate);
+ len = b2json_length(bencoding);
+ char out[len+1];
+ char * end = b2json(out, bencoding);
+ *end = '\0';
+ puts(out);
+ if (end - out != len)
+ error_at_line(4, 0, __FILE__, __LINE__, "b2json wrote %ld instead of %d bytes.", end-out, len);
+ fprintf(stderr, "len: %d\n", len);
+ free_bencoding(bencoding);
+ free(in);
+ return 0;
+}