author: sijanec <anton@sijanec.eu> 2021-04-03 23:15:48 +0200
committer: sijanec <anton@sijanec.eu> 2021-04-03 23:15:48 +0200
commit: bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4 (patch)
tree: f9960c7a43f7c0e1da6cb8e8656fcbda2129677a
parent: initial commit (diff)
download: sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar
sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.gz
sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.bz2
sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.lz
sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.xz
sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.zst
sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.zip
12 files changed, 453 insertions, 54 deletions
diff --git a/Makefile b/Makefile
index a5f5a31..5db1e09 100644
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,9 @@ default:
 	mkdir tmp -p
 	xxd -i < src/hp.html > tmp/hp.xxd
 	echo ', 0' >> tmp/hp.xxd
-	gcc -Wall -pedantic -g -Isrc -Itmp -pthread src/main.c $$(xml2-config --libs --cflags) -lmicrohttpd -osear.c
+	xxd -i < src/osdd.xml > tmp/osdd.xxd
+	echo ', 0' >> tmp/osdd.xxd
+	gcc -Wall -Wextra -pedantic -Wno-unused-parameter -g -Isrc -Itmp -pthread src/main.c $$(xml2-config --libs --cflags) -lmicrohttpd -osear.c
 
 install:
 	mkdir -p $(DESTDIR)/usr/bin/
@@ -25,3 +27,6 @@ test-http:
 
 test-http-valgrind:
 	valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose --log-file=valgrind-out.txt tmp/nanohttp http://sijanec.eu/
+
+valgrind:
+	valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose --log-file=valgrind-out.txt ./sear.c
diff --git a/README.md b/README.md
index b7bbb81..ad87bc8 100644
--- a/README.md
+++ b/README.md
@@ -2,12 +2,12 @@
 
 **not implemented! check back again in a couple of days (:**
 
-sear.c is used as a lightweight replacement for [SearX](//en.wikipedia.org/wiki/Searx) that proxies and caches search results from
+sear.c is used as a lightweight replacement for [SearX](https://en.wikipedia.org/wiki/Searx) that proxies and caches search results from
 the Google web search engine. The main advantages over SearX are speed and simplicity.
 
 ## instructions for debian and ubuntu systems
 
-First add my software distribution repository [prog.sijanec.eu](//prog.sijanec.eu) into your APT sources list.
+First add my software distribution repository [prog.sijanec.eu](https://prog.sijanec.eu) into your APT sources list.
 
 ```
 apt install sear.c
diff --git a/src/api.c b/src/api.c
index ae8d619..6ad996e 100644
--- a/src/api.c
+++ b/src/api.c
@@ -107,7 +107,9 @@ char * sc_find_class (char * haystack, const char * definition) { /* you must fr
 	toreturn[endofclass-class] = '\0';
 	return toreturn;
 }
-int sc_query_google (char * s, struct sc_cache * c) {
+struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct sc_query * q) { /* check for cached queries first! */
+	/* query is in most cases NULL. then it will be allocated and put into sc_cache. otherwise response will be put into passed q. */
+	/* if query is not NULL, it MUST be initialized */
 	/*
 		remarks:
 			* we are using wap.google.com over HTTP and with a user-agent string of a nokia mobile phone, so we get a lite website
@@ -117,7 +119,7 @@ int sc_query_google (char * s, struct sc_cache * c) {
 					+ A href points to a tracking relative link, starting with /url?q=. the q parameter contains the (obv urlencoded) link.
 				- result date: class has only one definition, {color:#70757a}, but same definition has the class for the settings A link.
 					+ extract those two classes and find the one that is only present on SPAN text elements.
-				- result description: once we have the result div, the description is the // span with the appropriate class
+				- result description: once we have the result div, the description is the //table//span with the appropriate class
 					+ the appropriate class is the only one with {word-break:break-word}. note that this class also describes other elements.
 				- result div: to get the result div, we need the parent of the parent of the A link of the title.
 			* result dates are sometimes relative ("an hour ago") and heavily depend on the client location, based on IP.
@@ -129,35 +131,106 @@ int sc_query_google (char * s, struct sc_cache * c) {
 					based http request-response based user interface so we can ask the user to complete the captcha. this is not yet
 					implemeted and will be hard work.
 	*/
-	if (!s || !c)
-		return -1;
-	int rs = 1;
+	int rs;
+	if (!s || !c) {
+		rs = -1;
+		goto rc;
+	}
+	int qwasgiven = 0;
+	if (!q)
+		q = sc_query_init();
+	else
+		qwasgiven++;
 	char * us = malloc(sizeof(char)*strlen(s)*3+1);
 	urlencode(us, s);
+	char * xpath = NULL;
+	char * descclass = NULL;
+	char * titleclass = NULL;
 	char * txtdoc = SC_CAPI(c, NULL, NULL, "http://wap.google.com/search?q=%s", us);
 	// fprintf(stdout, "%s\n", txtdoc);
 	free(us);
 	if (!txtdoc) {
+		SC_LOG(SC_LOG_ERROR, c, "!txtdoc");
 		rs = -2;
 		goto rc;
 	}
-	char * titleclass = sc_find_class(txtdoc, "{color:#1967D2;font-size:14px;line-height:16px}");
-	if (!titleclass) {
-		SC_LOG(SC_LOG_ERROR, c, "!titleclass");
+	titleclass = sc_find_class(txtdoc, "{color:#1967D2;font-size:14px;line-height:16px}");
+	descclass = sc_find_class(txtdoc, "{word-break:break-word}");
+	if (!titleclass || !descclass) {
+		SC_LOG(SC_LOG_ERROR, c, "!titleclass || !descclass");
 		rs = -3;
 		goto rc;
 	}
-#define SC_GTXF "/html/body//a[contains(@class, '%s')]" // @class='fuLhoc ZWRArf'"
-	char * xpath = malloc(strlen(titleclass)+strlen(SC_GTXF));
+#define SC_GTXF "/html/body//a[contains(@class, '%s')]" /* result a */
+#define SC_GTXD "../..//table//span[@class='%s']"
+#define SC_GTR q->results[q->results_length-1]
+	xpath = malloc(strlen(titleclass)+strlen(SC_GTXF));
 	sprintf(xpath, SC_GTXF, titleclass); /* whenever starts with titleclas */
-	fprintf(stdout, "%s\n", xpath);
 	htmlDocPtr xmldoc = parseHtmlDocument(txtdoc, NULL);
-	xmlXPathObjectPtr nodes = findNodes(xmldoc, xpath);
-	eachNode(nodes, printLinkNode, NULL);
-rc:
+	if (qwasgiven) /* as you can see, when q is given, queries will be write-locked for the whole XML processing time! */
+		SC_CWLE(c, c->queries_lock);
+	q->results_length = 0;
+	gnu_code_start;
+	eachNodeX(xmldoc, xpath,
+			lambda(void, (xmlNodePtr node, void * data), 
+				{
+					if (node->type == XML_ELEMENT_NODE) {
+						xmlAttrPtr href = xmlHasProp(node, BAD_CAST "href");
+						if (href) {
+							char * hreflink = (char *) xmlGetProp(node, BAD_CAST "href");
+							if (!strncmp(hreflink, "/url?q=", strlen("/url?q="))) {
+								hreflink = hreflink+strlen("/url?q=");
+								*strchrnul(hreflink, '&') = '\0';
+								urldecode(hreflink, hreflink);
+							}
+							char * x = malloc(strlen(descclass)+strlen(SC_GTXD));
+							sprintf(x, SC_GTXD, descclass /* remember, kids, GNU C is fucking legendary */);
+							xmlNodePtr descnode = nthNodeXN(node, x, 0);
+							free(x);
+							if (q->results_sizeof <= q->results_length)
+								SC_BIGGER_ARRAY(q->results, sc_result);
+							q->results_length++;
+							SC_GTR->query = q;
+							SC_GTR->title = (char *) xmlNodeGetContent(node->children);
+							if (!SC_GTR->title) {
+								SC_GTR->title = malloc(strlen(SC_I18N_NO_TITLE)+1);
+								strcpy(SC_GTR->title, SC_I18N_NO_TITLE);
+							}
+							SC_GTR->url = hreflink;
+							if (!SC_GTR->url) {
+								SC_GTR->url = malloc(strlen(SC_I18N_NO_HREFLINK)+1);
+								strcpy(SC_GTR->url, SC_I18N_NO_HREFLINK);
+							}
+							SC_GTR->desc = (char *) xmlNodeGetContent(descnode);
+							if (!SC_GTR->desc) {
+								SC_GTR->desc = malloc(strlen(SC_I18N_NO_DESCRIPTION)+1);
+								strcpy(SC_GTR->desc, SC_I18N_NO_DESCRIPTION);
+							}
+						}
+					}
+				}
+			),
+		NULL);
+	gnu_code_end;
+	q->cache = c;
+	q->lookup_time = time(NULL);
+	q->engines = SC_ENGINE_GOOGLE;
+	q->string = realloc(q->string, strlen(s)+1);
+	strcpy(q->string, s);
+	if (!qwasgiven) {
+		SC_CWLE(c, c->queries_lock);
+		if (c->queries_sizeof <= c->queries_length)
+			SC_BIGGER_ARRAY(c->queries, sc_query);
+		c->queries_length++;
+#define SC_GTQ c->queries[c->queries_length-1]
+		SC_GTQ = q;
+	}
+	SC_CUE(c, c->queries_lock);
 	xmlFreeDoc(xmldoc);
+rc:
 	free(txtdoc);
 	free(titleclass);
+	free(descclass);
 	free(xpath);
-	return rs;
+	return (rs < 0) ? NULL : q;
 }
diff --git a/src/hp.html b/src/hp.html
index 64da49d..d2bc82f 100644
--- a/src/hp.html
+++ b/src/hp.html
@@ -10,30 +10,49 @@
 		<link rel=stylesheet href=//sijanec.eu/assets/css/styles.css?ref=sear.c /> <!-- TODO: direktno vstavljanje v dokument -->
 		<link rel="shortcut icon" href="data:image/x-icon;," type="image/x-icon"> <!-- prevents favicon lookups -->
 		<link rel="icon" href="data:;base64,iVBORw0KGgo=">
+		<link rel=search" type="application/opensearchdescription+xml" href="/osdd.xml">
 		<style>
 			input[type=password], input[type=text], input[type=submit], input[type=button] {
-				width: 100%%;
-				height: 1,5cm;
-				font-size: 18;
-			}
-			input .125 {
-				width: 125%%;
-			}
-			input .50 {
-				width: 50%%;
+				height: 1cm;
+				font-size: 18px;
 			}
 			.result:hover {
 				background: var(--bgc2);
 			}
+			.container {
+				display: flex;
+				flex-direction: row;
+				flex-wrap: nowrap;
+				justify-content: center;
+				align-items: stretch;
+			}
+			input[name=q] {
+				flex-grow: 4;
+			}
+			input[type=submit] {
+				flex-basis: 12.5%%;
+			}
+			.SC_LOG_ERROR {
+				color: red;
+			}
+			.SC_LOG_WARNING {
+				color: orange;
+			}
+			.SC_LOG_INFO {
+				color: lightgreen;
+			}
+			.SC_LOG_DEBUG {
+				color: magenta;
+			}
 		</style>
 	</head>
 	<body>
-		<form>
-			<input type=text name=q class=50 value="{{ query }}" placeholder="sear.c ..." />
-			<input type=submit class=125 value=🔍 /> <!-- magnifying glass emoji -->
-			<input type=submit class=125 name=f value=Ʊ /> <!-- horseshoe unicode character -->
-			<input type=submit class=125 name=i value=🖼 /> <!-- framed picture emoji -->
-			<input type=submit class=125 name=v value=🎬 /> <!-- that thing they use in movies to denote start of a scene emoji -->
+		<form class=container>
+			<input accesskey=4 type=text name=q value="%s" placeholder="sear.c ..." /> <!-- see www.standardaccesskeys.com -->
+			<input type=submit value=🔍 /> <!-- magnifying glass emoji -->
+			<input type=submit name=f value=Ʊ /> <!-- horseshoe unicode character -->
+			<input type=submit name=i value=🖼 hidden=hidden /> <!-- framed picture emoji - img search not implemented -->
+			<input type=submit name=v value=🎬 hidden=hidden /> <!-- that thing they use in movies - vid search N/I -->
 		</form>
 		<h3>
 			%s
diff --git a/src/httpd.c b/src/httpd.c
new file mode 100644
index 0000000..bf5c3d1
--- /dev/null
+++ b/src/httpd.c
@@ -0,0 +1,169 @@
+char * sc_queryhtml (struct sc_query * q) { /* remember to free returned string in the caller */ /* caller takes care of locking */
+	size_t resultshtml_written = 0;
+	size_t resultshtml_sizeof = SC_ALLOC_CHUNK;
+	char * resultshtml = malloc(resultshtml_sizeof);
+	resultshtml[0] = '\0';
+	for (size_t i = 0; i < q->results_length; i++) {
+#define SC_HRC(string, wanted) \
+		if (string##_written+wanted >= string##_sizeof) { \
+			string##_sizeof = (string##_written+wanted+1)*SC_REALLOC_K; \
+			string = realloc(string, string##_sizeof); \
+		}
+#define SC_HRF "<div class=result><h4><a href=\"%s\">%s</a></h4><p>%s</p></div>"
+		char * safetitle = htmlspecialchars(q->results[i]->title);
+		char * safebody = htmlspecialchars(q->results[i]->desc);
+		char * safeurl = htmlspecialchars(q->results[i]->url);
+		size_t ws = snprintf(NULL, 0, SC_HRF, safeurl, safetitle, safebody);
+		SC_HRC(resultshtml, ws);
+		resultshtml_written += sprintf(resultshtml+resultshtml_written, SC_HRF, safeurl, safetitle, safebody);
+		free(safetitle);
+		free(safebody);
+		free(safeurl);
+	}
+#define SC_HRS SC_I18N_NUMBER_OF_RESULTS ": %ld | " SC_I18N_QUERY_TIME ": %s"
+	char formatted_time[128];
+	struct tm tm;
+	localtime_r(&q->lookup_time, &tm);
+	strftime(formatted_time, 128, SC_I18N_DATETIME_FORMAT, &tm);
+	char queryinfo[256];
+	snprintf(queryinfo, 256, SC_HRS, q->results_length, formatted_time);
+	char * safequery = htmlspecialchars(q->string);
+	char * response = malloc(strlen((char *) sc_hp)+2*strlen(safequery)+strlen(queryinfo)+strlen(resultshtml));
+	sprintf(response, (char *) sc_hp, safequery, safequery, queryinfo, resultshtml);
+	free(safequery);
+	free(resultshtml);
+	return response;
+}
+char * sc_logshtml (struct sc_cache * c) { /* remember to free on caller, remember not to report errors here whilst locked */
+	char * html = malloc(SC_ALLOC_CHUNK);
+	html[0] = '\0';
+	size_t html_written = 0;
+	size_t html_sizeof = 0;
+	pthread_rwlock_rdlock(c->logentries_lock);
+	if (!c->logentries) {
+		free(html);
+		return NULL;
+	}
+	for (size_t i = 0; i < c->logentries_length; i++) {
+#define SC_HLF "<div class=result id=log%lu>[<span class=%s>%s</span>] %s " \
+		"<a href=\"" SC_I18N_GIT_URL "/src/branch/master/%s#L%lu\">%s()@%s:%lu</a>: %s</div>"
+#define SC_HLA i, \
+		sc_log_str(c->logentries[i]->type), \
+		sc_log_str(c->logentries[i]->type), \
+		formatted_time, \
+		c->logentries[i]->file, \
+		c->logentries[i]->line, \
+		c->logentries[i]->function, /* compile-time burned in values are safe from xss :) */ \
+		c->logentries[i]->file, \
+		c->logentries[i]->line, \
+		safemessage /* ... whereas this might contain < */
+		struct tm tm;
+		char formatted_time[128];
+		localtime_r(&c->logentries[i]->time, &tm);
+		strftime(formatted_time, 128, SC_I18N_DATETIME_FORMAT, &tm);
+		char * safemessage = htmlspecialchars(c->logentries[i]->message);
+		size_t ws = snprintf(NULL, 0, SC_HLF, SC_HLA);
+		SC_HRC(html, ws);
+		html_written += sprintf(html+html_written, SC_HLF, SC_HLA);
+		free(safemessage);
+	}
+	pthread_rwlock_unlock(c->logentries_lock);
+	return html;
+}
+int sc_httpd (void * cls,
+													struct MHD_Connection * connection,
+													const char * url,
+													const char * method,
+													const char * version,
+													const char * upload_data,
+													size_t * upload_data_size,
+													void ** ptr) {
+	struct sc_cache * c = (struct sc_cache *) cls;
+	static int dummy;
+	struct MHD_Response * httpd_response;
+	int ret;
+	if (0 != strcmp(method, "GET"))
+		return MHD_NO; /* unexpected method */
+	if (&dummy != *ptr) {
+		/* the first time only the headers are valid, do not respond in the first round ... */
+		*ptr = &dummy;
+		return MHD_YES;
+	}
+	if (0 != *upload_data_size)
+		return MHD_NO; /* upload data in a GET?! */
+	*ptr = NULL; /* clear context pointer */
+	char * response = NULL;
+	enum MHD_ResponseMemoryMode mhdrmm = MHD_RESPMEM_MUST_FREE;
+	const char * query = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "q");
+	const char * host = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "Host");
+	char * location = "//git.sijanec.eu/sijanec/sear.c";
+	char * content_type = "text/html";
+	int status_code = MHD_HTTP_OK;
+	if (!host)
+		host = "";
+	struct sc_query * q = NULL;
+	if (!query) {
+		if (url[0] == '/')
+			switch (url[1]) {
+				case 's': /* security.txt */
+				case '.': /* .well-known/security.txt */
+					mhdrmm = MHD_RESPMEM_PERSISTENT;
+					response = sc_securitytxt;
+					content_type = "text/plain";
+					break;
+				case 'r': /* robots.txt */
+					mhdrmm = MHD_RESPMEM_PERSISTENT;
+					response = sc_robotstxt;
+					content_type = "text/plain";
+					break;
+				case 'o': /* osdd.xml - opensearch description document */
+					response = malloc(strlen(sc_osdd)+strlen(host));
+					sprintf(response, sc_osdd, host);
+					content_type = "application/opensearchdescription+xml";
+					break;
+				case 'l': /* logs.html */
+					{
+						char * logshtml = sc_logshtml(c);
+						response = malloc(strlen((char *) sc_hp)+strlen(SC_I18N_LOGS)+strlen(logshtml ? logshtml : SC_I18N_LOGS_ERROR));
+						sprintf(response, (char *) sc_hp, "", "", SC_I18N_LOGS, logshtml ? logshtml : SC_I18N_LOGS_ERROR);
+						free(logshtml);
+					}
+					break;
+			}
+		if (!response) {
+			response = malloc(strlen((char *) sc_hp)+strlen(SC_I18N_HP_HEADING)+strlen(SC_I18N_HP_BODY));
+			sprintf(response, (char *) sc_hp, "", "", SC_I18N_HP_HEADING, SC_I18N_HP_BODY);
+		}
+	} else {
+		int already_retried = 0;
+retry:
+		SC_CRLE(c, c->queries_lock);
+		for (size_t i = 0; i < c->queries_length; i++)
+			if (!strcmp(c->queries[i]->string, query))
+				q = c->queries[i];
+		if (q) {
+			response = sc_queryhtml(q); /* MHD_create_response_from_buffer will free response (; */
+			if (MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "f") && q->results_length > 0) {
+				status_code = 307;
+				location = q->results[0]->url;
+			}
+			SC_CUE(c, c->queries_lock);
+		} else {
+			SC_CUE(c, c->queries_lock);
+			sc_query_google(query, c, NULL);
+			if (already_retried++) {
+				char * safequery = htmlspecialchars(query);
+				response = malloc(strlen((char*) sc_hp)+strlen(safequery)*2+strlen(SC_I18N_HP_ERROR_HEADING)+strlen(SC_I18N_HP_ERROR_BODY));
+				sprintf(response, (char *) sc_hp, safequery, safequery, SC_I18N_HP_ERROR_HEADING, SC_I18N_HP_ERROR_BODY);
+				free(safequery);
+			} else goto retry;
+		}
+	}
+	httpd_response = MHD_create_response_from_buffer (strlen(response), (void *) response, mhdrmm);
+	MHD_add_response_header(httpd_response, "Content-Type", content_type);
+	if (status_code >= 300 && status_code <= 399)
+		MHD_add_response_header(httpd_response, "Location", location);
+	ret = MHD_queue_response(connection, status_code, httpd_response);
+	MHD_destroy_response(httpd_response);
+	return ret;
+}
diff --git a/src/i18n.h b/src/i18n.h
index e69de29..7335100 100644
--- a/src/i18n.h
+++ b/src/i18n.h
@@ -0,0 +1,18 @@
+#define SC_I18N_NO_TITLE "ni naslova"
+#define SC_I18N_NO_HREFLINK "/? ni hiperpovezave"
+#define SC_I18N_NO_DESCRIPTION "ni opisa"
+#define SC_I18N_HP_HEADING "dobrodošli na prvo stran <code>sear.c</code>"
+#define SC_I18N_HP_BODY "<code>sear.c</code> je program za anonimizacijo in predpomnenje rezultatov spletnih iskalnikov. " \
+												"Za uporabo nekaj vnesite v iskalno vrstico zgoraj in pritisnite gumb za iskanje."
+#define SC_I18N_NUMBER_OF_RESULTS "število zadetkov"
+#define SC_I18N_QUERY_TIME "čas poizvedbe"
+#define SC_I18N_DATETIME_FORMAT "%c"
+#define SC_I18N_UNLOCKING "odklepanje"
+#define SC_I18N_LOCKING "zaklepanje"
+#define SC_I18N_FAILED "ni uspelo"
+#define SC_I18N_HP_ERROR_HEADING "napaka!"
+#define SC_I18N_HP_ERROR_BODY "Pri pridobivanju rezultatov je api klic odvrnil s kodo, ki označuje neuspelo stanje. " \
+															"Preberite <a href=/logs.html>dnevniške zapise</a>."
+#define SC_I18N_LOGS "dnevniški zapisi"
+#define SC_I18N_LOGS_ERROR "napaka pri branju dnevniških datotek"
+#define SC_I18N_GIT_URL "//git.sijanec.eu/sijanec/sear.c"
diff --git a/src/lib.c b/src/lib.c
index 2c3e34a..5c0576e 100644
--- a/src/lib.c
+++ b/src/lib.c
@@ -1,4 +1,4 @@
-static htmlDocPtr parseHtmlDocument(const char * d, const char * b /* base url */) {
+htmlDocPtr parseHtmlDocument (const char * d, const char * b /* base url */) {
 	if (!b)
 		b = "";
 	htmlParserCtxtPtr parser_context = htmlNewParserCtxt();
@@ -6,28 +6,112 @@ static htmlDocPtr parseHtmlDocument(const char * d, const char * b /* base url *
 	htmlFreeParserCtxt(parser_context);
 	return document;
 }
-static xmlXPathObjectPtr findNodes(htmlDocPtr document, const char * xpath_query) {
+xmlXPathObjectPtr findNodes (htmlDocPtr document, const char * xpath_query) {
 	xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(document);
 	xmlXPathObjectPtr nodes = xmlXPathEvalExpression(BAD_CAST xpath_query, xpath_ctx);
+	if (xmlXPathNodeSetIsEmpty(nodes->nodesetval)) {
+		xmlXPathFreeContext(xpath_ctx);
+		xmlXPathFreeObject(nodes);
+		return NULL;
+	}
+	xmlXPathFreeContext(xpath_ctx);
+	return nodes;
+}
+xmlXPathObjectPtr findNodesN (xmlNodePtr node, const char * xpath_query) {
+	xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(node->doc);
+	xmlXPathSetContextNode(node, xpath_ctx);
+	xmlXPathObjectPtr nodes = xmlXPathNodeEval(node, BAD_CAST xpath_query, xpath_ctx);
+	if (xmlXPathNodeSetIsEmpty(nodes->nodesetval)) {
+		xmlXPathFreeContext(xpath_ctx);
+		xmlXPathFreeObject(nodes);
+		return NULL;
+	}
 	xmlXPathFreeContext(xpath_ctx);
 	return nodes;
 }
-typedef void (*node_function_t)(xmlNodePtr node, void * data);
-static void eachNode(xmlXPathObjectPtr nodes, node_function_t f, void * data) {
+typedef void (*node_function_t) (xmlNodePtr node, void * data);
+void eachNode (xmlXPathObjectPtr nodes, node_function_t f, void * data) { /* you can instead use EACHNODE macro */
 	xmlNodeSetPtr nodeset = nodes->nodesetval;
 	int i, size = nodeset->nodeNr;
 	for (i = 0; i < size; i++) {
 		xmlNodePtr cur;
-		cur = (xmlNodePtr)nodeset->nodeTab[i];
+		cur = (xmlNodePtr) nodeset->nodeTab[i];
 		f(cur, data);
 	}
 }
-void printLinkNode(xmlNodePtr node, void * data) {
+void eachNodeX (htmlDocPtr doc, const char * xpath, node_function_t f, void * data) {
+	xmlXPathObjectPtr nodes = findNodes(doc, xpath);
+	if (!nodes)
+		return;
+	eachNode(nodes, f, data);
+	xmlXPathFreeObject(nodes);
+}
+xmlNodePtr nthNodeXN (xmlNodePtr node, const char * xpath, int n) {
+	xmlXPathObjectPtr nodes = findNodesN(node, xpath);
+	if (!nodes)
+		return NULL;
+	xmlNodeSetPtr nodeset = nodes->nodesetval;
+	int size = nodeset->nodeNr;
+	if (size <= n)
+		return NULL;
+	xmlNodePtr toreturn = (xmlNodePtr) nodeset->nodeTab[n];
+	xmlXPathFreeObject(nodes);
+	return toreturn;
+}
+#define EACHNODE(node, nodes) /* you can instead use eachNodeX with anonymous function - no need to free and findnodes separatl */ \
+	for (int EACHNODE_i = 0; \
+			nodes ? nodes->nodesetval ? \
+				((EACHNODE_i < nodes->nodesetval->nodeNr) && (node = (xmlNodePtr)nodes->nodesetval->nodeTab[EACHNODE_i])) \
+			: 0 : 0; \
+			EACHNODE_i++)
+/* // to ne dela
+#define EACHNODEX(node, target, xpath) \
+	xmlXPathObjectPtr EACHNODEX_nodes##__LINE__ = findNodes(target, xpath); \
+		for (size_t EACHNODEX_i = 0; \
+				EACHNODEX_nodes##__LINE__ ? EACHNODEX_nodes##__LINE__->nodesetval \
+					? ((EACHNODEX_i < EACHNODEX_nodes##__LINE__->nodesetval->nodeNr) \
+						&& (node = (xmlNodePtr) EACHNODEX_nodes##__LINE__->nodesetval->nodeTab[EACHNODEX_i])) \
+					: xmlXPathFreeObject(EACHNODEX_nodes##__LINE__) \
+				: 0 : 0; \
+				EACHNODEX_i++)
+*/
+void printNode (xmlNodePtr node, void * data) {
+	if (data){}
 	if (node->type == XML_ELEMENT_NODE) {
-		xmlAttrPtr href = xmlHasProp(node, BAD_CAST "href");
-		if (href) {
-			printf("-> Link to '%s'\n", xmlGetProp(node, BAD_CAST "href"));
+		printf("-> content: '%s'\n", (char *) xmlNodeGetContent(node));
+	}
+}
+#define gnu_code_start \
+	_Pragma ("GCC diagnostic push") \
+	_Pragma ("GCC diagnostic ignored \"-Wpedantic\"")
+#define gnu_code_end \
+	_Pragma ("GCC diagnostic pop") 
+/* this is the definition of the anonymous function - source: https://en.wikipedia.org/wiki/Anonymous_function#GCC */
+#define lambda(l_ret_type, l_arguments, l_body)        \
+	({                                                   \
+	 l_ret_type l_anonymous_functions_name l_arguments   \
+	 l_body                                              \
+	 &l_anonymous_functions_name;                        \
+	 })
+char * htmlspecialchars (const char * i) { /* remember to free the output */
+	size_t s = 128;
+	char * o = malloc(s);
+	size_t w = 0;
+	for (; *i; i++) {
+		if (s - w <= 10)
+			o = realloc(o, (s *= 1.5));
+		switch (*i) {
+			case '<':
+				w += sprintf(o+w, "&lt;");
+				break;
+			case '"':
+				w += sprintf(o+w, "&quot;");
+				break;
+			default:
+				o[w++] = *i;
+				break;
 		}
 	}
+	o[w++] = '\0';
+	return o;
 }
-
diff --git a/src/log.c b/src/log.c
index d229512..5e4dc16 100644
--- a/src/log.c
+++ b/src/log.c
@@ -31,7 +31,7 @@ int sc_push_log (unsigned char t, struct sc_cache * c, const char * ca, char * f
 		return -2;
 	if (pthread_rwlock_wrlock(lock))
 		return -3;
-	if (c->logentries_sizeof - c->logentries_length != 0)
+	if (c->logentries_sizeof <= c->logentries_length)
 		SC_BIGGER_ARRAY(c->logentries, sc_logentry);
 	c->logentries_length++;
 	size_t strlenm = strlen(m);
@@ -53,6 +53,7 @@ int sc_push_log (unsigned char t, struct sc_cache * c, const char * ca, char * f
 	SC_PLL->line = l;
 	SC_PLL->function = ca;
 	SC_PLL->time = time(NULL);
+	SC_PLL->type = t;
 	fprintf(stderr, "[sear.c] %s %s()@%s:%lu: %s\n", sc_log_str(t), ca, f, l, SC_PLL->message); /* in posix, this is thread safe */
 	if (lock && pthread_rwlock_unlock(lock))
 		return -4;
diff --git a/src/main.c b/src/main.c
index c42d5c0..3e8c544 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,3 +1,4 @@
+#define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -16,11 +17,25 @@
 #include <libxml/HTMLtree.h>
 #include <libxml/tree.h>
 #include <libxml/xpath.h>
+#include <microhttpd.h>
 #include <lib.c>
 #include <url.c>
 unsigned char sc_hp[] = { /* html page null terminated format string, from file src/hp.html */
 #include <hp.xxd>
 };
+char sc_osdd[] = { /* xml document for opensearch */
+#include <osdd.xxd>
+};
+char sc_robotstxt[] = "User-Agent: *\nDisallow: /\n";
+char sc_securitytxt[] = "# This content information is provided by the developer of this opensource application.\n"
+	"# The developer is not responsible for the actions of his software\n"
+	"# This website IS NOT operated by the developer. Do not use the contact information below in hopes of contacting the webmaster\n"
+	"# The following contact information is provided for reporting security bugs regarding the software, not for legal issues\n"
+	"Contact: https://www.sijanec.eu/o.html#kontakt\n"
+	"Acknowledgments: https://git.sijanec.eu/sijanec/sear.c\n"
+	"Encryption: https://www.sijanec.eu/pgp-key.txt\n"
+	"Expires: Thu, 31 Dec 2021 18:37:07 -0800\n"
+	"Preferred-Languages: sl, en, de, hr\n";
 #define SC_HTTP_PORT 7327 /* SEAR on mobile keyboard */
 #define SC_HTTP_RBUFSIZE 4096 /* initial size of http read buffer, increasning by K */
 #define SC_HTTP_USER_AGENT "Nokia WAP Gateway 4.1 CD1/ECD13_D/4.1.04)" /* so google and others sends a minimal response */
@@ -28,16 +43,25 @@ unsigned char sc_hp[] = { /* html page null terminated format string, from file
 #include <structs.c>
 #include <log.c>
 #include <api.c>
+#include <httpd.c>
 /* this is new in my programs. I am now using _sizeof for the actual alloced size of the array and _length for the count of elements in array. this is done to decrease number of calls to realloc&amis */
 int main (int argc, char ** argv) {
 	int rs = 0;
 	struct sc_cache * c = sc_cache_init();
+	struct MHD_Daemon * d;
 	if (!c) {
 		rs = 1;
 		goto rc;
 	}
-	sc_query_google("slovenia", c);
+	d = MHD_start_daemon(MHD_USE_THREAD_PER_CONNECTION, SC_HTTP_PORT, NULL, NULL, &sc_httpd, c, MHD_OPTION_END);
+	if (!d) {
+		rs = 2;
+		goto rc;
+	}
+	/* sc_query_google(argv[1], c); */
+	getc(stdin);
 rc:
 	sc_cache_free(c);
+	MHD_stop_daemon(d);
 	return rs;
 }
diff --git a/src/osdd.xml b/src/osdd.xml
new file mode 100644
index 0000000..9522910
--- /dev/null
+++ b/src/osdd.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+	 <!-- this file is a format string, so make sure it does not exceed ~4000 characters and it has percents escaped with
+	percent percent. -->
+<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/" >
+ <ShortName>sear.c</ShortName>
+ <Description>sear.c</Description>
+ <Url type="text/html" method="get" template="http://%s/?q={searchTerms}" />
+</OpenSearchDescription>
diff --git a/src/structs.c b/src/structs.c
index fa4228c..b99f1eb 100644
--- a/src/structs.c
+++ b/src/structs.c
@@ -33,8 +33,7 @@ struct sc_result {
 	char * url; /* yesfree */
 	char * desc; /* yesfree */
 	char * title; /* yesfree */
-	time_t date; /* some search engines like to extract a date from a website, store that here */
-	char * html; /* yesfree - cached generated html output of said result or NULL before it's created */
+	time_t date; /* some search engines like to extract a date from a website, store that here - not implemented */
 	unsigned short int rating; /* some search engines like to extract a rating from a website, store that here */ /* not implementd */
 	unsigned short int rating_max; /* max rating when above is used /\ */ /* not implemented yet */
 };
@@ -48,7 +47,6 @@ int sc_result_free (struct sc_result * r) {
 	free(r->url);
 	free(r->desc);
 	free(r->title);
-	free(r->html);
 	free(r);
 	return 1;
 }
@@ -58,7 +56,6 @@ struct sc_query {
 	char * string; /* yesfree - query string, stripped of any excess characters that should be excluded from indexing */
 	time_t lookup_time; /* time of last lookup */
 	unsigned char engines; /* with what engine(s) was the query done - bitmask - if there are results from multiple engines */
-	char * html; /* yesfree - cached generated HTML output of the result or NULL before it's created */
 };
 struct sc_query * sc_query_init () {
 	struct sc_query * q = calloc(1, sizeof(struct sc_query));
@@ -74,7 +71,6 @@ int sc_query_free (struct sc_query * q) {
 	if (!q)
 		return -1;
 	free(q->string); /* if they were not alloced, they are NULL, if they were free'd somewhere else, they are also set to NULL */
-	free(q->html); /* setting to NULL here is not necessary, as we'll never use this query struct again */
 	for (size_t i = 0; i < q->results_sizeof; i++)
 		sc_result_free(q->results[i]);
 	free(q);
diff --git a/src/url.c b/src/url.c
index a3a29e7..df93138 100644
--- a/src/url.c
+++ b/src/url.c
@@ -1,5 +1,5 @@
-int urlencode (char * o, char * i /* o must have at least strlen(i)*3+1 bytes of memory allocated */) {
-	size_t written = 0;
+int urlencode (char * o, const char * i /* o must have at least strlen(i)*3+1 bytes of memory allocated */) {
+	size_t written = 0; /* o CANNOT be equal to i, unlike in urldecode */
 	for (; *i; i++) {
 		if (isalnum(*i) || *i == '.' || *i == '_' || *i == '-' || *i == '~') {
 			o[written++] = *i;
@@ -8,10 +8,11 @@ int urlencode (char * o, char * i /* o must have at least strlen(i)*3+1 bytes of
 			written += 3;
 		}
 	}
+	o[written++] = '\0';
 	return 1;
 }
-int urldecode (char * o, char * i /* o must have at least strlen(i)+1 bytes memory allocated */) {
-	size_t written = 0;
+int urldecode (char * o, const char * i /* o must have at least strlen(i)+1 bytes memory allocated */) {
+	size_t written = 0; /* o can be equal to i for decoding in-place */
 	char buf[] = "00";
 	for (; *i; i++) {
 		if (*i == '%') {
@@ -26,5 +27,6 @@ int urldecode (char * o, char * i /* o must have at least strlen(i)+1 bytes memo
 			o[written++] = *i;
 		}
 	}
+	o[written++] = '\0';
 	return 1;
 }
author	sijanec <anton@sijanec.eu>	2021-04-03 23:15:48 +0200
committer	sijanec <anton@sijanec.eu>	2021-04-03 23:15:48 +0200
commit	bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4 (patch)
tree	f9960c7a43f7c0e1da6cb8e8656fcbda2129677a
parent	initial commit (diff)
download	sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.gz sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.bz2 sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.lz sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.xz sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.zst sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.zip