summaryrefslogblamecommitdiffstats
path: root/src/bencoding.c
blob: d1324c75806c50c041821a28b23e900febfb04a6 (plain) (tree)






















                                                                                                                                    



                                                                                                                                                  














                                                                                                                                          

                                                                                          

                                










                                          


































































































































































































                                                                                                                                                                                               

















































                                                                                                                                   
                                                         







                                                     
                                



                                                                                                     


                                                                                                                                     
                                                                                      
                                                                                                                                     


                                                                                   
                               
                      
                         


                                                                





                                                                             

                                                                                                     
                                             



                                                                         

                                             
                                                       
                                                                  

                                                                            
                                 



                                                         




                                                                                                









                                                                                                                                   

                                                                





                                                                                                                                     
                                                                                                                                  















                                                                                     
/**
 * enum of all possible bencoding types and some options to use
 * to check a type, use ORing, not direct comparison, as bdecoded structs inherit opts from bdecode function in their ->types
 */

enum benc {
	string = 1 << 0,
	num = 1 << 1,
	list = 1 << 2,
	dict = 1 << 3,
	terminate = 1 << 4 /**< bencoding strings are terminated and you do not need bencoding_string to use them. breaks input str.
			     note: when out of space, the terminator is placed instead of the last character of the string. **/
};

/**
 * structure representation of bencoded data
 * the structure does not copy any data, it's assumed that the origin string that was used to create the structure does not change
 */

struct bencoding {
	struct bencoding * next; /**< NULL if element is not member of a list or dict */
	struct bencoding * prev;
	struct bencoding * child; /**< NULL if element is not a list or dict or if it has 0 children */
	enum benc type; /**< type | opts of this element */
	struct bencoding * key; /**< the key element, string according to the spec, applicable for dict */
	char * value; /**< set to the content of the element, value is not null terminated unless terminate opt is set. NULL for dict and list. */
	size_t valuelen; /**< length of string value, as value is not null terminated, internal value for list or dict. */
	int intvalue;
	int index;
	char oldterminator; /**< when opts&terminate, the character that was replaced with \0 is stored here */
	char oldterminatorls; /**< when opts&terminate when there was no more space, replaced character is stored here.
				if there'd be enough space, the next one of this one would be replaced.
				this is used by bencoding string, as it will repair the original string and restore the last character. */
};

/**
 * frees the passed bencoding struct or performs no action if NULL was passed. caller should NULL the pointer to prevent reuse.
 */

void free_bencoding (struct bencoding * b) {
	if (!b)
		return;
	free_bencoding(b->child); /* we free the child should it exist. it can be NULL. */
	free_bencoding(b->key); /* should this be an element of a dict, free the key */
	free_bencoding(b->next);
	free(b);
	return;
}

/**
 * helper macros for number comparisons
 */

#define MAX(x, y) ((x) >= (y) ? (x) : (y))
#define MIN(x, y) ((x) <= (y) ? (x) : (y))

/**
 * return how much space a character in a string uses
 *
 * @param a	[in]	the character in question
 */

int b2json_charsize (char a) {
	if (a == '"')
		return 2;
	if (a == '\\')
		return 2;
	if (a == '\b')
		return 2;
	if (a == '\f')
		return 2;
	if (a == '\n')
		return 2;
	if (a == '\r')
		return 2;
	if (a == '\t')
		return 2;
	if (a < ' ')
		return 6;
	return 1;
}

/**
 * write a string representation of a character in a JSON string
 *
 * @param dest	[out]	destination
 * @param a	[in]	the character in question
 * @return the destination pointer, incremented for the number of bytes written
 */

char * b2json_charrepr (char * dest, char a) {
	switch (a) {
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstringop-truncation"
		case '"':
			strncpy(dest, "\\\"", 2);
			return dest+2;
		case '\\':
			strncpy(dest, "\\\\", 2);
			return dest+2;
		case '\b':
			strncpy(dest, "\\b", 2);
			return dest+2;
		case '\f':
			strncpy(dest, "\\f", 2);
			return dest+2;
		case '\n':
			strncpy(dest, "\\n", 2);
			return dest+2;
		case '\r':
			strncpy(dest, "\\r", 2);
			return dest+2;
		case '\t':
			strncpy(dest, "\\t", 2);
			return dest+2;
		default:
			if (a < ' ') {
				char buf[7];
				sprintf(buf, "\\u00%02x", a);
				strncpy(dest, buf, 6);
				return dest+6;
			} else {
				*dest++ = a;
				return dest;
			}
#pragma GCC diagnostic pop
	}
}


/**
 * get size required for JSON representation of a bencoding struct. terminating NULL byte is not counted, because b2json does not write it. write it yourself.
 *
 * @param b	[in]	bencoding structure of a bdecoded element
 */

int b2json_length (struct bencoding * b) {
	if (!b)
		return 4;
	if (b->type & string) {
		int size = 2;
		if (b->oldterminatorls)
			size += b2json_charsize(b->oldterminatorls) - b2json_charsize('\0');
		for (size_t i = 0; i < b->valuelen; i++)
			size += b2json_charsize(b->value[i]);
		return size;
	}
	if (b->type & num) {
		char buf[512];
		sprintf(buf, "%d", b->intvalue);
		return strlen(buf);
	}
	if (b->type & list) {
		if (!b->child)
			return 2;
		struct bencoding * t = b->child;
		int size = 2 + b2json_length(t);
		while (t->next) {
			t = t->next;
			size += b2json_length(t) + 1;
		}
		return size;
	}
	if (b->type & dict) {
		if (!b->child)
			return 2;
		struct bencoding * t = b->child;
		int size = 3 + b2json_length(t) + b2json_length(t->key);
		while (t->next) {
			t = t->next;
			size += 1 + b2json_length(t) + 1 + b2json_length(t->key);
		}
		return size;
	}
	return 5;
}

/**
 * write json representation of a bencoding struct. does not write terminating nullbyte, b2json_length does not include it in count. add it yourself. should write exactly b2json_length bytes.
 *
 * writes false when struct has an incorrect type and null when NULL pointer is passed, this is in ordnung with b2json_length.
 *
 * @param dest	[in]	destination
 * @param b	[in]	bencoding structure of a bdecoded element
 * @return the destination pointer, incremented for the number of bytes written
 */

char * b2json (char * dest, struct bencoding * b) {
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstringop-truncation"
	if (!b) {
		strncpy(dest, "null", 4);
		return dest+4;
	}
	if (b->type & string) {
		*dest++ = '"';
		for (size_t i = 0; i < b->valuelen; i++)
			if (i == b->valuelen-1 && b->oldterminatorls)
				dest = b2json_charrepr(dest, b->oldterminatorls);
			else
				dest = b2json_charrepr(dest, b->value[i]);
		*dest++ = '"';
		return dest;
	}
	if (b->type & num) {
		char buf[512];
		sprintf(buf, "%d", b->intvalue);
		strncpy(dest, buf, strlen(buf));
		return dest+strlen(buf);
	}
	if (b->type & list) {
		if (!b->child) {
			strncpy(dest, "[]", 2);
			return dest+2;
		}
		struct bencoding * t = b->child;
		*dest++ = '[';
		dest = b2json(dest, t);
		while (t->next) {
			t = t->next;
			*dest++ = ',';
			dest = b2json(dest, t);
		}
		*dest++ = ']';
		return dest;
	}
	if (b->type & dict) {
		if (!b->child) {
			strncpy(dest, "{}", 2);
			return dest+2;
		}
		*dest++ = '{';
		struct bencoding * t = b->child;
		dest = b2json(dest, t->key);
		*dest++ = ':';
		dest = b2json(dest, t);
		while (t->next) {
			t = t->next;
			*dest++ = ',';
			dest = b2json(dest, t->key);
			*dest++ = ':';
			dest = b2json(dest, t);
		}
		*dest++ = '}';
		return dest;
	}
	strncpy(dest, "false", 4);
	return dest+4;
#pragma GCC diagnostic pop
}

/**
 * macro that allocas a C string from a bencoding string or other element. non-string elements return their raw bencoded content.
 * dereferences structure without checking.
 * resulting C string is NULL terminated, cannot contain NULL, DO NOT dereference bytes after the NULL terminator.
 *
 * @param stru	[in]	bencoding structure of a bdecoded element
 * @param char	[out]	char * type variable that will contain allocad string. DO NOT ATTEMPT TO FREE; automatic free at return!
 */

#define bencoding_string(stru, char) \
	char = alloca(stru->valuelen+1); \
	snprintf(char, stru->valuelen+1, "%.*s", stru->valuelen, stru->value); \
	if (stru->oldterminatorls) \
		char[stru->valuelen-1] = char[stru->oldterminatorls]; \

/**
 * bdecodes a bencoded structure from a string into a bencoding structure that must be free_bencodinged by the caller.
 * 
 * nonstandard things: this parser allows for dict keys to be of any type, valuekey
 *
 * by default input string is unmodified, unless terminate opt is set.
 *
 * @param len	[in]	* if set to -1, string is assumed to be correct and not NULL terminated, NULLs may be in strings.
 * 				- malicious strings may trigger reads past the end of the buffer, which may lead to undefined
 *				behaviour, crashes (DoS) or leaks of content, stored in memory.
 *				- if opts&terminate, another character will be written after the bencoded structure in memory if
 *				that structure is a string. beware and have space allocated for it!
 * 			* if set to -2, string is assumed to be NULL terminated and no further reading will be done after the NULL.
 * 				- if such terminator breaks an incomplete element, the resulting structure may be incomplete, but
 * 				will be correct - for example valuelen of a misterminated string will correctly be shortened.
 * 			* if set to a positive number, reading will only be allowed up to that many characters.
 * 				- if the input string reads the end and the structure is incomplete, same thing as with -2 happens.
 * 				- if the structure ends cleanly (string length satisfied or end of list, dict or num found),
 * 				processing stops, no mather how many characters of len are left.
 * @param opts	[in]	sets options. do not set the type bits here, this is the same enum as the ->type enum of returned struct.
 * 			opts will be reflected in the ->type of the returning struct. opts will apply to childs of lists&dicts too.
 */

struct bencoding * bdecode (char * s, int len, enum benc opts) {
	if (!s || len < -2 || (len >= 0 && len < 2 /* 2 being the smallest bencoding string */))
		return NULL;
	if (len == -2)
		len = strlen(s);
	struct bencoding * b = calloc(1, sizeof(struct bencoding)); /* SEGV if OOM */
	char * c = NULL;
	switch (s[0]) {
		case 'i': /* num */
			b->type = num;
			b->value = s+1;
			if (len == -1 || memchr(s, 'e', len)) { /* correct string or end found */
				b->intvalue = strtol(b->value, &c, 10);
				b->valuelen = c-b->value;
			}
			break;
		case 'd': /* dict */
			b->type = dict;
			__attribute__((fallthrough));
		case 'l': /* list */
			if (!b->type)
				b->type = list;
			c = s+1;
			struct bencoding * arbeit = NULL;
			struct bencoding * oldarbeit = NULL;
			struct bencoding * oldoldarbeit = NULL; /* for dicts, holds previous value */
			int index = 0;
			while (len == -1 || c <= s+len) {  /* s+len is max we are allowed to read */
				if (oldarbeit && oldarbeit->type & string && oldarbeit->type & terminate && oldarbeit->oldterminator)
					c[0] = oldarbeit->oldterminator;
				arbeit = bdecode(c, len == -1 ? -1 : len-(c-s), opts);
				if (oldarbeit && oldarbeit->type & string && oldarbeit->type & terminate && oldarbeit->oldterminator)
					c[0] = '\0';
				if (!arbeit) /* bdecoding failed or last element */
					break;
#define ISDICT (b->type & dict)
#define ISLIST !ISDICT
#define ISVAL (index % 2)
#define ISKEY !ISVAL
				if (ISDICT && ISVAL)
					arbeit->key = oldarbeit;
				if (arbeit->type & num)
					c = arbeit->value+arbeit->valuelen+1;
				else if (arbeit->type & string)
					c = arbeit->value+arbeit->valuelen;
				else if (arbeit->type & (list | dict))
					c += arbeit->valuelen;
				arbeit->prev = ISDICT ? ISVAL ? oldoldarbeit : oldarbeit : oldarbeit;
				arbeit->index = ISDICT ? index/2 : index;
				if (ISLIST) {
					if (index)
						oldarbeit->next = arbeit;
					else
						b->child = arbeit;
				}
				if (ISDICT) {
					if (index == 1)
						b->child = arbeit;
					else if (ISVAL)
						oldoldarbeit->next = arbeit;
				}
				oldoldarbeit = oldarbeit;
				oldarbeit = arbeit;
				index++;
			}
			b->valuelen = c-s + 1;
			b->type = b->type | opts;
			if (ISDICT && ISVAL) // e je torej value, če je prej samoten key
				free_bencoding(oldarbeit); // this key would be otherwise leaked
			return b;
		case 'e': /* end of list/dict */
			free(b);
			return NULL;
		default:
			if (!(s[0] >= '0' && s[0] <= '9')) { /* not a string. not checking this would allow DoS for parsing "lx" */
				free(b);
				return NULL;
			}
			b->type = string;
			if (len == -1 || (b->value = memchr(s, ':', len))) {
				b->valuelen = strtol(s, &c, 10);
				b->value = c+1;
				if (len != -1 && (unsigned)len < b->valuelen + (b->value - s) /* len minus prefix; strlen & colon */)
					b->valuelen = len - (b->value - s); /* malformed bencoded data, truncating string */
			}
			break;
	}
	if (opts & terminate) {
		if (len != -1 && b->valuelen+1+(b->value-s) > (unsigned) len) { /* no space for terminator, put it on last char */
			b->oldterminatorls = b->value[b->valuelen-1];
			b->value[b->valuelen-1] = '\0';
		} else {
			b->oldterminator = b->value[b->valuelen];
			b->value[b->valuelen] = '\0';
		}
	}
	b->type = b->type | opts;
	return b;
}

/**
 * returns a pointer to bencoding struct matching bencoding path or NULL if not found
 * 
 * [xxx] specifies xxxth child of a dict or list. if 
 */