author: sijanec <anton@sijanec.eu> 2021-04-01 23:30:37 +0200
committer: sijanec <anton@sijanec.eu> 2021-04-01 23:30:37 +0200
commit: 579048eaf89784ec1da8592d96311fafd49aea1a (patch)
tree: 61bf0c50c656f2b16ed8901ec3b07fb468ffb916
download: sear.c-579048eaf89784ec1da8592d96311fafd49aea1a.tar
sear.c-579048eaf89784ec1da8592d96311fafd49aea1a.tar.gz
sear.c-579048eaf89784ec1da8592d96311fafd49aea1a.tar.bz2
sear.c-579048eaf89784ec1da8592d96311fafd49aea1a.tar.lz
sear.c-579048eaf89784ec1da8592d96311fafd49aea1a.tar.xz
sear.c-579048eaf89784ec1da8592d96311fafd49aea1a.tar.zst
sear.c-579048eaf89784ec1da8592d96311fafd49aea1a.zip
12 files changed, 598 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2a46c53
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+sear.c
+tmp/
+valgrind-out.txt
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..a5f5a31
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,27 @@
+DESTDIR=/
+
+default:
+	mkdir tmp -p
+	xxd -i < src/hp.html > tmp/hp.xxd
+	echo ', 0' >> tmp/hp.xxd
+	gcc -Wall -pedantic -g -Isrc -Itmp -pthread src/main.c $$(xml2-config --libs --cflags) -lmicrohttpd -osear.c
+
+install:
+	mkdir -p $(DESTDIR)/usr/bin/
+	cp sear.c $(DESTDIR)/usr/bin/
+
+distclean:
+	rm sear.c tmp -rf
+
+clean:
+	rm sear.c tmp -rf
+
+prepare:
+	sudo apt install libmicrohttpd-dev xxd build-essential libxml2-dev -y
+
+test-http:
+	mkdir -p tmp
+	gcc -Wall -pedantic -g -Isrc -Itmp test/nanohttp.c $$(xml2-config --libs --cflags) -otmp/nanohttp
+
+test-http-valgrind:
+	valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose --log-file=valgrind-out.txt tmp/nanohttp http://sijanec.eu/
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..b7bbb81
--- /dev/null
+++ b/README.md
@@ -0,0 +1,40 @@
+# sear.c
+
+**not implemented! check back again in a couple of days (:**
+
+sear.c is used as a lightweight replacement for [SearX](//en.wikipedia.org/wiki/Searx) that proxies and caches search results from
+the Google web search engine. The main advantages over SearX are speed and simplicity.
+
+## instructions for debian and ubuntu systems
+
+First add my software distribution repository [prog.sijanec.eu](//prog.sijanec.eu) into your APT sources list.
+
+```
+apt install sear.c
+service sear.c start
+```
+
+## requirements
+
+* a POSIX system
+* GNU C library
+* GNU compiler collection (it's written in GNU C - it uses anonymous functions)
+* GNU Make
+* libxml2-dev (for the simple HTML/1.0 client and HTML parser)
+* libmicrohttpd-dev (for serving results - use a reverse proxy, such as nginx, for HTTPS)
+* xxd (for converting HTML pages into C arrays when compiling from source)
+
+## compiling from source
+
+```
+make prepare
+make
+```
+
+## a word about public instances
+
+It's not recommended to run a public instance of sear.c yet, as google likes to prevent spam by using a captcha.
+
+It's expected that I'll write a simple captcha forwarder so that users of public instances will solve captchas that are presented and keep the instance healthy.
+
+Currently it's recommended to run private instances, protected by a password using a reverse HTTP proxy.
diff --git a/src/api.c b/src/api.c
new file mode 100644
index 0000000..ae8d619
--- /dev/null
+++ b/src/api.c
@@ -0,0 +1,163 @@
+#define SC_CAPI(c, b, h, e, ...) sc_api(c, b, h, 0##__VA_OPT__(1), e __VA_OPT__(,) __VA_ARGS__)
+#define SC_CAPIX(c, b, h, e, ...) sc_capix(c, b, h, 0##__VA_OPT__(1), e __VA_OPT__(,) __VA_ARGS__)
+char * sc_api (struct sc_cache * c, char * body, char * headers, int isfmt, char * endpoint, ...) {
+	if (!c || !endpoint)
+		return NULL;
+	size_t va_count = parse_printf_format(endpoint, 0, NULL);
+	char * endpoint_formatted = NULL;
+	long response_code = 0;
+	if (isfmt && va_count > 0 && endpoint_formatted == NULL) {
+		va_list ap, ap2;
+		va_start(ap, endpoint);
+		va_copy(ap2, ap);
+		size_t strlenm = vsnprintf(NULL, 0, endpoint, ap);
+		endpoint_formatted = malloc(sizeof(char)*strlenm+1);
+		vsnprintf(endpoint_formatted, strlenm+1, endpoint, ap2);
+		va_end(ap);
+		va_end(ap2);
+	}
+	if (!headers)
+		headers = "";
+	char * hedrs = malloc(sizeof(char)*strlen(headers)+strlen(SC_HTTP_HEADERS)+1);
+	strcpy(hedrs, SC_HTTP_HEADERS);
+	strcat(hedrs, headers);
+	char * contentType = NULL;
+	char * redir = NULL;
+	char * buf = malloc(sizeof(char)*SC_HTTP_RBUFSIZE);
+	size_t buf_sizeof = SC_HTTP_RBUFSIZE;
+	size_t buf_length = 0;
+	int readstatus = 0;
+	void * r = xmlNanoHTTPMethodRedir(
+			endpoint_formatted ? endpoint_formatted : endpoint,
+			body ? "POST" : "GET",
+			body,
+			&contentType,
+			&redir,
+			hedrs,
+			body ? strlen(body) : 0
+			);
+	if (!r) {
+		SC_LOG(SC_LOG_ERROR, c, "!r, endpoint: %s", endpoint_formatted ? endpoint_formatted : endpoint);
+		goto rc;
+	}
+	response_code = xmlNanoHTTPReturnCode(r);
+	if (!(response_code - 200 >= 0 && response_code - 200 < 100)) {
+		SC_LOG(SC_LOG_ERROR, c, "response_code == %ld, endpoint: %s", response_code, endpoint_formatted ? endpoint_formatted:endpoint);
+	}
+	while ((readstatus = xmlNanoHTTPRead(r, buf+buf_length, buf_sizeof-buf_length)) > 0) {
+		buf_length += readstatus;
+		if (buf_sizeof-buf_length < SC_HTTP_RBUFSIZE) {
+			buf_sizeof *= SC_REALLOC_K;
+			buf = realloc(buf, sizeof(char)*buf_sizeof);
+		}
+	}
+	if (readstatus == -1)
+		SC_LOG(SC_LOG_ERROR, c, "readstatus == -1, endpoint: %s", endpoint_formatted ? endpoint_formatted : endpoint);
+	xmlNanoHTTPClose(r);
+	SC_LOG(SC_LOG_DEBUG, c, "contentType = %s, redir = %s", contentType ? contentType : "NULL", redir ? redir : "NULL");
+rc:
+	free(endpoint_formatted);
+	free(contentType);
+	free(redir);
+	free(hedrs);
+	return buf;
+}
+htmlDocPtr sc_capix (struct sc_cache * c, char * body, char * headers, int isfmt, char * endpoint, ...) {
+	if (!c || !endpoint)
+		return NULL;
+	size_t va_count = parse_printf_format(endpoint, 0, NULL);
+	char * endpoint_formatted = NULL;
+	if (isfmt && va_count > 0 && endpoint_formatted == NULL) {
+		va_list ap, ap2;
+		va_start(ap, endpoint);
+		va_copy(ap2, ap);
+		size_t strlenm = vsnprintf(NULL, 0, endpoint, ap);
+		endpoint_formatted = malloc(sizeof(char)*strlenm+1);
+		vsnprintf(endpoint_formatted, strlenm+1, endpoint, ap2);
+		va_end(ap);
+		va_end(ap2);
+	}
+	char * buf = sc_api(c, body, headers, 0, endpoint_formatted ? endpoint_formatted : endpoint);
+	htmlDocPtr htmldoc = parseHtmlDocument(buf, endpoint_formatted ? endpoint_formatted : endpoint);
+	free(buf);
+	free(endpoint_formatted);
+	return htmldoc;
+}
+char * sc_find_class (char * haystack, const char * definition) { /* you must free class after calling */
+	if (!haystack || !definition)
+		return NULL;
+	char * class = strstr(haystack, definition);
+	if (!class)
+		return NULL;
+	int found = 0;
+	for (; class > haystack; class--)
+		if (class[-1] == '.' && (found = 1))
+			break;
+	if (!found)
+		return NULL;
+	char * endofclass = class;
+	found = 0;
+	for (; *endofclass; endofclass++) /* google only has alphanumeric class names. TODO: be pedantic and conformic to w3 stds */
+		if (!isalnum(endofclass[0]) && (found = 1))
+			break;
+	if (!found)
+		return NULL;
+	char * toreturn = malloc(endofclass-class+1);
+	strncpy(toreturn, class, endofclass-class);
+	toreturn[endofclass-class] = '\0';
+	return toreturn;
+}
+int sc_query_google (char * s, struct sc_cache * c) {
+	/*
+		remarks:
+			* we are using wap.google.com over HTTP and with a user-agent string of a nokia mobile phone, so we get a lite website
+			* we determine which class holds a specific value by looking at the css definitions
+				- result title: the only class that has definition {color:#1967D2;font-size:14px;line-height:16px}
+					+ A links have this class set, but they have a child SPAN element that then holds the text of the title
+					+ A href points to a tracking relative link, starting with /url?q=. the q parameter contains the (obv urlencoded) link.
+				- result date: class has only one definition, {color:#70757a}, but same definition has the class for the settings A link.
+					+ extract those two classes and find the one that is only present on SPAN text elements.
+				- result description: once we have the result div, the description is the // span with the appropriate class
+					+ the appropriate class is the only one with {word-break:break-word}. note that this class also describes other elements.
+				- result div: to get the result div, we need the parent of the parent of the A link of the title.
+			* result dates are sometimes relative ("an hour ago") and heavily depend on the client location, based on IP.
+				- we won't parse those yet
+			* I couldn't find anything with ratings, so we won't parse thouse either yet
+			* captcha: google knows that this nokia phone we're pretending to be doesn't support javascript
+				- the request limiting captcha must work on a phone without javascript. it is probably loaded inside an iframe, but has
+					origin protection, so we can't just solve it client-side. we would have to proxy images and create some sort of a session
+					based http request-response based user interface so we can ask the user to complete the captcha. this is not yet
+					implemeted and will be hard work.
+	*/
+	if (!s || !c)
+		return -1;
+	int rs = 1;
+	char * us = malloc(sizeof(char)*strlen(s)*3+1);
+	urlencode(us, s);
+	char * txtdoc = SC_CAPI(c, NULL, NULL, "http://wap.google.com/search?q=%s", us);
+	// fprintf(stdout, "%s\n", txtdoc);
+	free(us);
+	if (!txtdoc) {
+		rs = -2;
+		goto rc;
+	}
+	char * titleclass = sc_find_class(txtdoc, "{color:#1967D2;font-size:14px;line-height:16px}");
+	if (!titleclass) {
+		SC_LOG(SC_LOG_ERROR, c, "!titleclass");
+		rs = -3;
+		goto rc;
+	}
+#define SC_GTXF "/html/body//a[contains(@class, '%s')]" // @class='fuLhoc ZWRArf'"
+	char * xpath = malloc(strlen(titleclass)+strlen(SC_GTXF));
+	sprintf(xpath, SC_GTXF, titleclass); /* whenever starts with titleclas */
+	fprintf(stdout, "%s\n", xpath);
+	htmlDocPtr xmldoc = parseHtmlDocument(txtdoc, NULL);
+	xmlXPathObjectPtr nodes = findNodes(xmldoc, xpath);
+	eachNode(nodes, printLinkNode, NULL);
+rc:
+	xmlFreeDoc(xmldoc);
+	free(txtdoc);
+	free(titleclass);
+	free(xpath);
+	return rs;
+}
diff --git a/src/hp.html b/src/hp.html
new file mode 100644
index 0000000..64da49d
--- /dev/null
+++ b/src/hp.html
@@ -0,0 +1,49 @@
+<!DOCTYPE html>
+<html lang=sl>
+	<!-- this file is a printf format. be sure to escape percent signs with percent percent. -->
+	<!-- this format requires the following types (in order): query string, query string, result info string, results html string --> 
+	<head>
+		<meta charset=UTF-8 />
+		<title>
+			%s :: sear.c
+		</title>
+		<link rel=stylesheet href=//sijanec.eu/assets/css/styles.css?ref=sear.c /> <!-- TODO: direktno vstavljanje v dokument -->
+		<link rel="shortcut icon" href="data:image/x-icon;," type="image/x-icon"> <!-- prevents favicon lookups -->
+		<link rel="icon" href="data:;base64,iVBORw0KGgo=">
+		<style>
+			input[type=password], input[type=text], input[type=submit], input[type=button] {
+				width: 100%%;
+				height: 1,5cm;
+				font-size: 18;
+			}
+			input .125 {
+				width: 125%%;
+			}
+			input .50 {
+				width: 50%%;
+			}
+			.result:hover {
+				background: var(--bgc2);
+			}
+		</style>
+	</head>
+	<body>
+		<form>
+			<input type=text name=q class=50 value="{{ query }}" placeholder="sear.c ..." />
+			<input type=submit class=125 value=🔍 /> <!-- magnifying glass emoji -->
+			<input type=submit class=125 name=f value=Ʊ /> <!-- horseshoe unicode character -->
+			<input type=submit class=125 name=i value=🖼 /> <!-- framed picture emoji -->
+			<input type=submit class=125 name=v value=🎬 /> <!-- that thing they use in movies to denote start of a scene emoji -->
+		</form>
+		<h3>
+			%s
+		</h3>
+		%s
+		<hr>
+		<h4 align=center>
+			<a href=//git.sijanec.eu/sijanec/sear.c >
+				sear.c
+			</a>
+		</h4>
+	</body>
+</html>
diff --git a/src/i18n.h b/src/i18n.h
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/i18n.h
diff --git a/src/lib.c b/src/lib.c
new file mode 100644
index 0000000..2c3e34a
--- /dev/null
+++ b/src/lib.c
@@ -0,0 +1,33 @@
+static htmlDocPtr parseHtmlDocument(const char * d, const char * b /* base url */) {
+	if (!b)
+		b = "";
+	htmlParserCtxtPtr parser_context = htmlNewParserCtxt();
+	htmlDocPtr document = htmlCtxtReadMemory(parser_context, d, strlen(d), b, NULL /* encoding */, HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR  | HTML_PARSE_RECOVER);
+	htmlFreeParserCtxt(parser_context);
+	return document;
+}
+static xmlXPathObjectPtr findNodes(htmlDocPtr document, const char * xpath_query) {
+	xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(document);
+	xmlXPathObjectPtr nodes = xmlXPathEvalExpression(BAD_CAST xpath_query, xpath_ctx);
+	xmlXPathFreeContext(xpath_ctx);
+	return nodes;
+}
+typedef void (*node_function_t)(xmlNodePtr node, void * data);
+static void eachNode(xmlXPathObjectPtr nodes, node_function_t f, void * data) {
+	xmlNodeSetPtr nodeset = nodes->nodesetval;
+	int i, size = nodeset->nodeNr;
+	for (i = 0; i < size; i++) {
+		xmlNodePtr cur;
+		cur = (xmlNodePtr)nodeset->nodeTab[i];
+		f(cur, data);
+	}
+}
+void printLinkNode(xmlNodePtr node, void * data) {
+	if (node->type == XML_ELEMENT_NODE) {
+		xmlAttrPtr href = xmlHasProp(node, BAD_CAST "href");
+		if (href) {
+			printf("-> Link to '%s'\n", xmlGetProp(node, BAD_CAST "href"));
+		}
+	}
+}
+
diff --git a/src/log.c b/src/log.c
new file mode 100644
index 0000000..d229512
--- /dev/null
+++ b/src/log.c
@@ -0,0 +1,60 @@
+const char * sc_log_str (int t) {
+	switch (t) {
+		case SC_LOG_ERROR:
+			return "SC_LOG_ERROR";
+		case SC_LOG_WARNING:
+			return "SC_LOG_WARNING";
+		case SC_LOG_INFO:
+			return "SC_LOG_INFO";
+		case SC_LOG_DEBUG:
+			return "SC_LOG_DEBUG";
+		default:
+			return "SC_LOG_UNKNOWN";
+	}
+	/* interestingly, gcc figures out there's no way for code to reach this section, therefore there's no warning "-Wreturn-type" */
+}
+int sc_logentry_free (struct sc_logentry * l) {
+	free(l->message); l->message = NULL;
+	free(l);
+	return 1;
+}
+struct sc_logentry * sc_logentry_init () {
+	struct sc_logentry * l = calloc(1, sizeof(struct sc_logentry));
+	return l;
+}
+int sc_push_log (unsigned char t, struct sc_cache * c, const char * ca, char * f, size_t l, unsigned short int isf, char * m, ...) {
+#define SC_PLL c->logentries[c->logentries_length-1]
+	if (!c)
+		return -1;
+	pthread_rwlock_t * lock = c->logentries_lock;
+	if (!lock)
+		return -2;
+	if (pthread_rwlock_wrlock(lock))
+		return -3;
+	if (c->logentries_sizeof - c->logentries_length != 0)
+		SC_BIGGER_ARRAY(c->logentries, sc_logentry);
+	c->logentries_length++;
+	size_t strlenm = strlen(m);
+	size_t va_count = parse_printf_format(m, 0, NULL);
+	if (isf && va_count > 0) {
+		va_list ap, ap2;
+		va_start(ap, m);
+		va_copy(ap2, ap);
+		strlenm = vsnprintf(NULL, 0, m, ap);
+		SC_PLL->message = malloc(sizeof(char)*strlenm+1);
+		vsnprintf(SC_PLL->message, strlenm+1, m, ap2);
+		va_end(ap);
+		va_end(ap2);
+	} else {
+		SC_PLL->message = malloc(sizeof(char)*strlenm+1);
+		strcpy(SC_PLL->message, m);
+	}
+	SC_PLL->file = f;
+	SC_PLL->line = l;
+	SC_PLL->function = ca;
+	SC_PLL->time = time(NULL);
+	fprintf(stderr, "[sear.c] %s %s()@%s:%lu: %s\n", sc_log_str(t), ca, f, l, SC_PLL->message); /* in posix, this is thread safe */
+	if (lock && pthread_rwlock_unlock(lock))
+		return -4;
+	return 1;
+}
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..c42d5c0
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,43 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <printf.h>
+#include <ctype.h>
+#include <time.h>
+#include <pthread.h>
+#include <stdatomic.h>
+#include <i18n.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+#include <libxml/nanohttp.h>
+#include <libxml/HTMLparser.h>
+#include <libxml/HTMLtree.h>
+#include <libxml/tree.h>
+#include <libxml/xpath.h>
+#include <lib.c>
+#include <url.c>
+unsigned char sc_hp[] = { /* html page null terminated format string, from file src/hp.html */
+#include <hp.xxd>
+};
+#define SC_HTTP_PORT 7327 /* SEAR on mobile keyboard */
+#define SC_HTTP_RBUFSIZE 4096 /* initial size of http read buffer, increasning by K */
+#define SC_HTTP_USER_AGENT "Nokia WAP Gateway 4.1 CD1/ECD13_D/4.1.04)" /* so google and others sends a minimal response */
+#define SC_HTTP_HEADERS "User-Agent: " SC_HTTP_USER_AGENT "\r\n"
+#include <structs.c>
+#include <log.c>
+#include <api.c>
+/* this is new in my programs. I am now using _sizeof for the actual alloced size of the array and _length for the count of elements in array. this is done to decrease number of calls to realloc&amis */
+int main (int argc, char ** argv) {
+	int rs = 0;
+	struct sc_cache * c = sc_cache_init();
+	if (!c) {
+		rs = 1;
+		goto rc;
+	}
+	sc_query_google("slovenia", c);
+rc:
+	sc_cache_free(c);
+	return rs;
+}
diff --git a/src/structs.c b/src/structs.c
new file mode 100644
index 0000000..fa4228c
--- /dev/null
+++ b/src/structs.c
@@ -0,0 +1,118 @@
+#define SC_ALLOC_CHUNK 128 /* how many x to alloc when allocing (for performance so we don't call malloc over and over again) */
+#define SC_IN_STRUCT_ARRAY(type, name) _Atomic(type **) name; _Atomic(size_t) name##_sizeof; _Atomic(size_t) name##_length
+#define SC_CWLE(c, name) (pthread_rwlock_wrlock(name) ? (SC_LOG(SC_LOG_ERROR,c,SC_I18N_LOCKING " " #name " " SC_I18N_FAILED)||1) :0)
+#define SC_CRLE(c, name) (pthread_rwlock_rdlock(name) ? (SC_LOG(SC_LOG_ERROR,c,SC_I18N_LOCKING " " #name " " SC_I18N_FAILED)||1) :0)
+#define SC_CUE(c, name) (pthread_rwlock_unlock(name) ? (SC_LOG(SC_LOG_ERROR,c,SC_I18N_UNLOCKING " " #name " " SC_I18N_FAILED)||1):0)
+#define SC_REALLOC_K 1.5 /* constant to dynamically realloc large arrays (new size = current size * K) */
+#define SC_ENGINE_GOOGLE (1 << 0)
+/* _Atomic(size_t) sc_mem_max = 100e6; */ /* the really soft memory limit of the program: 100MB - NOT IMPLEMENTED */
+#define SC_LOG(t, c, m, ...) sc_push_log(t, c, __func__, __FILE__, __LINE__, 0##__VA_OPT__(1), m __VA_OPT__(,) __VA_ARGS__)
+#define SC_LOG_ERROR		(1 << 0)
+#define SC_LOG_WARNING	(1 << 1)
+#define SC_LOG_INFO			(1 << 2)
+#define SC_LOG_DEBUG		(1 << 3)
+#define SC_BIGGER_ARRAY(name, type) do { \
+		name = realloc(name, sizeof(name[0])*name##_sizeof*SC_REALLOC_K); \
+		for (size_t i = name##_sizeof; i < name##_sizeof*SC_REALLOC_K; i++) \
+			name[i] = type##_init(); \
+		name##_sizeof = name##_sizeof*SC_REALLOC_K; \
+	} while (0);
+struct sc_logentry {
+	unsigned char type; /* SC_LOG_ERROR, SC_LOG_WARNING, SC_LOG_INFO, SC_LOG_DEBUG */
+	size_t line;
+	const char * function; /* nofree */
+	char * file; /* nofree */
+	char * message; /* yesfree */
+	time_t time;
+};
+int sc_logentry_free (struct sc_logentry * l); /* defined in log.c */
+struct sc_logentry * sc_logentry_init (); /* defined in log.c */
+
+struct sc_result {
+	struct sc_query * query; /* nofree - free from sc_cache */
+	char * url; /* yesfree */
+	char * desc; /* yesfree */
+	char * title; /* yesfree */
+	time_t date; /* some search engines like to extract a date from a website, store that here */
+	char * html; /* yesfree - cached generated html output of said result or NULL before it's created */
+	unsigned short int rating; /* some search engines like to extract a rating from a website, store that here */ /* not implementd */
+	unsigned short int rating_max; /* max rating when above is used /\ */ /* not implemented yet */
+};
+struct sc_result * sc_result_init () {
+	struct sc_result * r = calloc(1, sizeof(struct sc_result));
+	return r;
+}
+int sc_result_free (struct sc_result * r) {
+	if (!r)
+		return -1;
+	free(r->url);
+	free(r->desc);
+	free(r->title);
+	free(r->html);
+	free(r);
+	return 1;
+}
+struct sc_query {
+	struct sc_cache * cache; /* nofree - what cache owns this query */
+	SC_IN_STRUCT_ARRAY(struct sc_result, results); /* yesfree */
+	char * string; /* yesfree - query string, stripped of any excess characters that should be excluded from indexing */
+	time_t lookup_time; /* time of last lookup */
+	unsigned char engines; /* with what engine(s) was the query done - bitmask - if there are results from multiple engines */
+	char * html; /* yesfree - cached generated HTML output of the result or NULL before it's created */
+};
+struct sc_query * sc_query_init () {
+	struct sc_query * q = calloc(1, sizeof(struct sc_query));
+	q->results_sizeof = SC_ALLOC_CHUNK;
+	q->results = calloc(q->results_sizeof, sizeof(struct sc_result *));
+	for (size_t i = 0; i < q->results_sizeof; i++) {
+		q->results[i] = sc_result_init();
+		q->results[i]->query = q;
+	}
+	return q;
+}
+int sc_query_free (struct sc_query * q) {
+	if (!q)
+		return -1;
+	free(q->string); /* if they were not alloced, they are NULL, if they were free'd somewhere else, they are also set to NULL */
+	free(q->html); /* setting to NULL here is not necessary, as we'll never use this query struct again */
+	for (size_t i = 0; i < q->results_sizeof; i++)
+		sc_result_free(q->results[i]);
+	free(q);
+	return 1;
+}
+struct sc_cache {
+	SC_IN_STRUCT_ARRAY(struct sc_query, queries); /* yesfree */
+	pthread_rwlock_t * queries_lock;
+	SC_IN_STRUCT_ARRAY(struct sc_logentry, logentries); /* yesfree */
+	pthread_rwlock_t * logentries_lock;
+};
+struct sc_cache * sc_cache_init() {
+	struct sc_cache * c = calloc(1, sizeof(struct sc_cache));
+	c->queries_sizeof = SC_ALLOC_CHUNK;
+	c->logentries_sizeof = SC_ALLOC_CHUNK;
+	c->queries = calloc(c->queries_sizeof, sizeof(struct sc_query *));
+	c->logentries = calloc(c->logentries_sizeof, sizeof(struct sc_logentry *));
+	for (size_t i = 0; i < c->queries_sizeof; i++) {
+		c->queries[i] = sc_query_init();
+		c->queries[i]->cache = c;
+		c->logentries[i] = sc_logentry_init();
+	}
+#define SC_CILI(name) do { name##_lock = malloc(sizeof(pthread_rwlock_t)); pthread_rwlock_init(name##_lock, NULL); } while (0)
+	SC_CILI(c->queries);
+	SC_CILI(c->logentries);
+	return c;
+}
+int sc_cache_free(struct sc_cache * c) {
+	if (!c)
+		return -1;
+	for (size_t i = 0; i < c->queries_sizeof; i++)
+		sc_query_free(c->queries[i]);
+	free(c->queries);
+	for (size_t i = 0; i < c->logentries_sizeof; i++)
+		sc_logentry_free(c->logentries[i]);
+	#define SC_CFLD(name) do { pthread_rwlock_destroy(name##_lock); free(name##_lock); } while(0)
+	SC_CFLD(c->queries);
+	SC_CFLD(c->logentries);
+	free(c);
+	return 1;
+}
diff --git a/src/url.c b/src/url.c
new file mode 100644
index 0000000..a3a29e7
--- /dev/null
+++ b/src/url.c
@@ -0,0 +1,30 @@
+int urlencode (char * o, char * i /* o must have at least strlen(i)*3+1 bytes of memory allocated */) {
+	size_t written = 0;
+	for (; *i; i++) {
+		if (isalnum(*i) || *i == '.' || *i == '_' || *i == '-' || *i == '~') {
+			o[written++] = *i;
+		} else {
+			sprintf(o+written, "%%%02X", *i);
+			written += 3;
+		}
+	}
+	return 1;
+}
+int urldecode (char * o, char * i /* o must have at least strlen(i)+1 bytes memory allocated */) {
+	size_t written = 0;
+	char buf[] = "00";
+	for (; *i; i++) {
+		if (*i == '%') {
+			buf[0] = *++i;
+			buf[1] = *++i;
+			if (!buf[0] || !buf[1]) { /* malformed */
+				o[written++] = '\0';
+				return 0;
+			}
+			o[written++] = strtol(buf, NULL, 16);
+		} else {
+			o[written++] = *i;
+		}
+	}
+	return 1;
+}
diff --git a/test/nanohttp.c b/test/nanohttp.c
new file mode 100644
index 0000000..78d9fda
--- /dev/null
+++ b/test/nanohttp.c
@@ -0,0 +1,32 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <libxml/nanohttp.h>
+#define UA "Nokia WAP Gateway 4.1 CD1/ECD13_D/4.1.04)"
+int main (int argc, char ** argv) {
+	if (argc < 1+1) {
+		fprintf(stderr, "%s <URL>\n", argv[0]);
+		return 1;
+	}
+	char * contentType = NULL;
+	char * redir = NULL;
+	int readstatus = 0;
+	char buf[4096];
+	/* xmlNanoHTTPInit(); */
+	void * r = xmlNanoHTTPMethodRedir(argv[1], "GET", NULL /* inp */, &contentType, &redir, "User-Agent: " UA "\r\n", 0 /* inplen */);
+	if (!r) {
+		fprintf(stderr, "!r\n");
+		return 1;
+	}
+	if (xmlNanoHTTPReturnCode(r) != 200)
+		fprintf(stderr, "xmlNanoHTTPReturnCode(r) != 200\n");
+	while ((readstatus = xmlNanoHTTPRead(r, buf, 4096)) > 1)
+		write(1, buf, readstatus);
+	if (readstatus == -1)
+		fprintf(stderr, "readstatus == -1\n");
+	xmlNanoHTTPClose(r);
+	free(contentType);
+	free(redir);
+	/* xmlNanoHTTPCleanup(); */
+	return 0;
+}
author	sijanec <anton@sijanec.eu>	2021-04-01 23:30:37 +0200
committer	sijanec <anton@sijanec.eu>	2021-04-01 23:30:37 +0200
commit	579048eaf89784ec1da8592d96311fafd49aea1a (patch)
tree	61bf0c50c656f2b16ed8901ec3b07fb468ffb916
download	sear.c-579048eaf89784ec1da8592d96311fafd49aea1a.tar sear.c-579048eaf89784ec1da8592d96311fafd49aea1a.tar.gz sear.c-579048eaf89784ec1da8592d96311fafd49aea1a.tar.bz2 sear.c-579048eaf89784ec1da8592d96311fafd49aea1a.tar.lz sear.c-579048eaf89784ec1da8592d96311fafd49aea1a.tar.xz sear.c-579048eaf89784ec1da8592d96311fafd49aea1a.tar.zst sear.c-579048eaf89784ec1da8592d96311fafd49aea1a.zip