summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsijanec <anton@sijanec.eu>2021-04-03 23:15:48 +0200
committersijanec <anton@sijanec.eu>2021-04-03 23:15:48 +0200
commitbbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4 (patch)
treef9960c7a43f7c0e1da6cb8e8656fcbda2129677a
parentinitial commit (diff)
downloadsear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar
sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.gz
sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.bz2
sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.lz
sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.xz
sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.zst
sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.zip
-rw-r--r--Makefile7
-rw-r--r--README.md4
-rw-r--r--src/api.c103
-rw-r--r--src/hp.html49
-rw-r--r--src/httpd.c169
-rw-r--r--src/i18n.h18
-rw-r--r--src/lib.c104
-rw-r--r--src/log.c3
-rw-r--r--src/main.c26
-rw-r--r--src/osdd.xml8
-rw-r--r--src/structs.c6
-rw-r--r--src/url.c10
12 files changed, 453 insertions, 54 deletions
diff --git a/Makefile b/Makefile
index a5f5a31..5db1e09 100644
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,9 @@ default:
mkdir tmp -p
xxd -i < src/hp.html > tmp/hp.xxd
echo ', 0' >> tmp/hp.xxd
- gcc -Wall -pedantic -g -Isrc -Itmp -pthread src/main.c $$(xml2-config --libs --cflags) -lmicrohttpd -osear.c
+ xxd -i < src/osdd.xml > tmp/osdd.xxd
+ echo ', 0' >> tmp/osdd.xxd
+ gcc -Wall -Wextra -pedantic -Wno-unused-parameter -g -Isrc -Itmp -pthread src/main.c $$(xml2-config --libs --cflags) -lmicrohttpd -osear.c
install:
mkdir -p $(DESTDIR)/usr/bin/
@@ -25,3 +27,6 @@ test-http:
test-http-valgrind:
valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose --log-file=valgrind-out.txt tmp/nanohttp http://sijanec.eu/
+
+valgrind:
+ valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose --log-file=valgrind-out.txt ./sear.c
diff --git a/README.md b/README.md
index b7bbb81..ad87bc8 100644
--- a/README.md
+++ b/README.md
@@ -2,12 +2,12 @@
**not implemented! check back again in a couple of days (:**
-sear.c is used as a lightweight replacement for [SearX](//en.wikipedia.org/wiki/Searx) that proxies and caches search results from
+sear.c is used as a lightweight replacement for [SearX](https://en.wikipedia.org/wiki/Searx) that proxies and caches search results from
the Google web search engine. The main advantages over SearX are speed and simplicity.
## instructions for debian and ubuntu systems
-First add my software distribution repository [prog.sijanec.eu](//prog.sijanec.eu) into your APT sources list.
+First add my software distribution repository [prog.sijanec.eu](https://prog.sijanec.eu) into your APT sources list.
```
apt install sear.c
diff --git a/src/api.c b/src/api.c
index ae8d619..6ad996e 100644
--- a/src/api.c
+++ b/src/api.c
@@ -107,7 +107,9 @@ char * sc_find_class (char * haystack, const char * definition) { /* you must fr
toreturn[endofclass-class] = '\0';
return toreturn;
}
-int sc_query_google (char * s, struct sc_cache * c) {
+struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct sc_query * q) { /* check for cached queries first! */
+ /* query is in most cases NULL. then it will be allocated and put into sc_cache. otherwise response will be put into passed q. */
+ /* if query is not NULL, it MUST be initialized */
/*
remarks:
* we are using wap.google.com over HTTP and with a user-agent string of a nokia mobile phone, so we get a lite website
@@ -117,7 +119,7 @@ int sc_query_google (char * s, struct sc_cache * c) {
+ A href points to a tracking relative link, starting with /url?q=. the q parameter contains the (obv urlencoded) link.
- result date: class has only one definition, {color:#70757a}, but same definition has the class for the settings A link.
+ extract those two classes and find the one that is only present on SPAN text elements.
- - result description: once we have the result div, the description is the // span with the appropriate class
+ - result description: once we have the result div, the description is the //table//span with the appropriate class
+ the appropriate class is the only one with {word-break:break-word}. note that this class also describes other elements.
- result div: to get the result div, we need the parent of the parent of the A link of the title.
* result dates are sometimes relative ("an hour ago") and heavily depend on the client location, based on IP.
@@ -129,35 +131,106 @@ int sc_query_google (char * s, struct sc_cache * c) {
based http request-response based user interface so we can ask the user to complete the captcha. this is not yet
implemeted and will be hard work.
*/
- if (!s || !c)
- return -1;
- int rs = 1;
+ int rs;
+ if (!s || !c) {
+ rs = -1;
+ goto rc;
+ }
+ int qwasgiven = 0;
+ if (!q)
+ q = sc_query_init();
+ else
+ qwasgiven++;
char * us = malloc(sizeof(char)*strlen(s)*3+1);
urlencode(us, s);
+ char * xpath = NULL;
+ char * descclass = NULL;
+ char * titleclass = NULL;
char * txtdoc = SC_CAPI(c, NULL, NULL, "http://wap.google.com/search?q=%s", us);
// fprintf(stdout, "%s\n", txtdoc);
free(us);
if (!txtdoc) {
+ SC_LOG(SC_LOG_ERROR, c, "!txtdoc");
rs = -2;
goto rc;
}
- char * titleclass = sc_find_class(txtdoc, "{color:#1967D2;font-size:14px;line-height:16px}");
- if (!titleclass) {
- SC_LOG(SC_LOG_ERROR, c, "!titleclass");
+ titleclass = sc_find_class(txtdoc, "{color:#1967D2;font-size:14px;line-height:16px}");
+ descclass = sc_find_class(txtdoc, "{word-break:break-word}");
+ if (!titleclass || !descclass) {
+ SC_LOG(SC_LOG_ERROR, c, "!titleclass || !descclass");
rs = -3;
goto rc;
}
-#define SC_GTXF "/html/body//a[contains(@class, '%s')]" // @class='fuLhoc ZWRArf'"
- char * xpath = malloc(strlen(titleclass)+strlen(SC_GTXF));
+#define SC_GTXF "/html/body//a[contains(@class, '%s')]" /* result a */
+#define SC_GTXD "../..//table//span[@class='%s']"
+#define SC_GTR q->results[q->results_length-1]
+ xpath = malloc(strlen(titleclass)+strlen(SC_GTXF));
sprintf(xpath, SC_GTXF, titleclass); /* whenever starts with titleclas */
- fprintf(stdout, "%s\n", xpath);
htmlDocPtr xmldoc = parseHtmlDocument(txtdoc, NULL);
- xmlXPathObjectPtr nodes = findNodes(xmldoc, xpath);
- eachNode(nodes, printLinkNode, NULL);
-rc:
+ if (qwasgiven) /* as you can see, when q is given, queries will be write-locked for the whole XML processing time! */
+ SC_CWLE(c, c->queries_lock);
+ q->results_length = 0;
+ gnu_code_start;
+ eachNodeX(xmldoc, xpath,
+ lambda(void, (xmlNodePtr node, void * data),
+ {
+ if (node->type == XML_ELEMENT_NODE) {
+ xmlAttrPtr href = xmlHasProp(node, BAD_CAST "href");
+ if (href) {
+ char * hreflink = (char *) xmlGetProp(node, BAD_CAST "href");
+ if (!strncmp(hreflink, "/url?q=", strlen("/url?q="))) {
+ hreflink = hreflink+strlen("/url?q=");
+ *strchrnul(hreflink, '&') = '\0';
+ urldecode(hreflink, hreflink);
+ }
+ char * x = malloc(strlen(descclass)+strlen(SC_GTXD));
+ sprintf(x, SC_GTXD, descclass /* remember, kids, GNU C is fucking legendary */);
+ xmlNodePtr descnode = nthNodeXN(node, x, 0);
+ free(x);
+ if (q->results_sizeof <= q->results_length)
+ SC_BIGGER_ARRAY(q->results, sc_result);
+ q->results_length++;
+ SC_GTR->query = q;
+ SC_GTR->title = (char *) xmlNodeGetContent(node->children);
+ if (!SC_GTR->title) {
+ SC_GTR->title = malloc(strlen(SC_I18N_NO_TITLE)+1);
+ strcpy(SC_GTR->title, SC_I18N_NO_TITLE);
+ }
+ SC_GTR->url = hreflink;
+ if (!SC_GTR->url) {
+ SC_GTR->url = malloc(strlen(SC_I18N_NO_HREFLINK)+1);
+ strcpy(SC_GTR->url, SC_I18N_NO_HREFLINK);
+ }
+ SC_GTR->desc = (char *) xmlNodeGetContent(descnode);
+ if (!SC_GTR->desc) {
+ SC_GTR->desc = malloc(strlen(SC_I18N_NO_DESCRIPTION)+1);
+ strcpy(SC_GTR->desc, SC_I18N_NO_DESCRIPTION);
+ }
+ }
+ }
+ }
+ ),
+ NULL);
+ gnu_code_end;
+ q->cache = c;
+ q->lookup_time = time(NULL);
+ q->engines = SC_ENGINE_GOOGLE;
+ q->string = realloc(q->string, strlen(s)+1);
+ strcpy(q->string, s);
+ if (!qwasgiven) {
+ SC_CWLE(c, c->queries_lock);
+ if (c->queries_sizeof <= c->queries_length)
+ SC_BIGGER_ARRAY(c->queries, sc_query);
+ c->queries_length++;
+#define SC_GTQ c->queries[c->queries_length-1]
+ SC_GTQ = q;
+ }
+ SC_CUE(c, c->queries_lock);
xmlFreeDoc(xmldoc);
+rc:
free(txtdoc);
free(titleclass);
+ free(descclass);
free(xpath);
- return rs;
+ return (rs < 0) ? NULL : q;
}
diff --git a/src/hp.html b/src/hp.html
index 64da49d..d2bc82f 100644
--- a/src/hp.html
+++ b/src/hp.html
@@ -10,30 +10,49 @@
<link rel=stylesheet href=//sijanec.eu/assets/css/styles.css?ref=sear.c /> <!-- TODO: direktno vstavljanje v dokument -->
<link rel="shortcut icon" href="data:image/x-icon;," type="image/x-icon"> <!-- prevents favicon lookups -->
<link rel="icon" href="data:;base64,iVBORw0KGgo=">
+ <link rel=search" type="application/opensearchdescription+xml" href="/osdd.xml">
<style>
input[type=password], input[type=text], input[type=submit], input[type=button] {
- width: 100%%;
- height: 1,5cm;
- font-size: 18;
- }
- input .125 {
- width: 125%%;
- }
- input .50 {
- width: 50%%;
+ height: 1cm;
+ font-size: 18px;
}
.result:hover {
background: var(--bgc2);
}
+ .container {
+ display: flex;
+ flex-direction: row;
+ flex-wrap: nowrap;
+ justify-content: center;
+ align-items: stretch;
+ }
+ input[name=q] {
+ flex-grow: 4;
+ }
+ input[type=submit] {
+ flex-basis: 12.5%%;
+ }
+ .SC_LOG_ERROR {
+ color: red;
+ }
+ .SC_LOG_WARNING {
+ color: orange;
+ }
+ .SC_LOG_INFO {
+ color: lightgreen;
+ }
+ .SC_LOG_DEBUG {
+ color: magenta;
+ }
</style>
</head>
<body>
- <form>
- <input type=text name=q class=50 value="{{ query }}" placeholder="sear.c ..." />
- <input type=submit class=125 value=🔍 /> <!-- magnifying glass emoji -->
- <input type=submit class=125 name=f value=Ʊ /> <!-- horseshoe unicode character -->
- <input type=submit class=125 name=i value=🖼 /> <!-- framed picture emoji -->
- <input type=submit class=125 name=v value=🎬 /> <!-- that thing they use in movies to denote start of a scene emoji -->
+ <form class=container>
+ <input accesskey=4 type=text name=q value="%s" placeholder="sear.c ..." /> <!-- see www.standardaccesskeys.com -->
+ <input type=submit value=🔍 /> <!-- magnifying glass emoji -->
+ <input type=submit name=f value=Ʊ /> <!-- horseshoe unicode character -->
+ <input type=submit name=i value=🖼 hidden=hidden /> <!-- framed picture emoji - img search not implemented -->
+ <input type=submit name=v value=🎬 hidden=hidden /> <!-- that thing they use in movies - vid search N/I -->
</form>
<h3>
%s
diff --git a/src/httpd.c b/src/httpd.c
new file mode 100644
index 0000000..bf5c3d1
--- /dev/null
+++ b/src/httpd.c
@@ -0,0 +1,169 @@
+char * sc_queryhtml (struct sc_query * q) { /* remember to free returned string in the caller */ /* caller takes care of locking */
+ size_t resultshtml_written = 0;
+ size_t resultshtml_sizeof = SC_ALLOC_CHUNK;
+ char * resultshtml = malloc(resultshtml_sizeof);
+ resultshtml[0] = '\0';
+ for (size_t i = 0; i < q->results_length; i++) {
+#define SC_HRC(string, wanted) \
+ if (string##_written+wanted >= string##_sizeof) { \
+ string##_sizeof = (string##_written+wanted+1)*SC_REALLOC_K; \
+ string = realloc(string, string##_sizeof); \
+ }
+#define SC_HRF "<div class=result><h4><a href=\"%s\">%s</a></h4><p>%s</p></div>"
+ char * safetitle = htmlspecialchars(q->results[i]->title);
+ char * safebody = htmlspecialchars(q->results[i]->desc);
+ char * safeurl = htmlspecialchars(q->results[i]->url);
+ size_t ws = snprintf(NULL, 0, SC_HRF, safeurl, safetitle, safebody);
+ SC_HRC(resultshtml, ws);
+ resultshtml_written += sprintf(resultshtml+resultshtml_written, SC_HRF, safeurl, safetitle, safebody);
+ free(safetitle);
+ free(safebody);
+ free(safeurl);
+ }
+#define SC_HRS SC_I18N_NUMBER_OF_RESULTS ": %ld | " SC_I18N_QUERY_TIME ": %s"
+ char formatted_time[128];
+ struct tm tm;
+ localtime_r(&q->lookup_time, &tm);
+ strftime(formatted_time, 128, SC_I18N_DATETIME_FORMAT, &tm);
+ char queryinfo[256];
+ snprintf(queryinfo, 256, SC_HRS, q->results_length, formatted_time);
+ char * safequery = htmlspecialchars(q->string);
+ char * response = malloc(strlen((char *) sc_hp)+2*strlen(safequery)+strlen(queryinfo)+strlen(resultshtml));
+ sprintf(response, (char *) sc_hp, safequery, safequery, queryinfo, resultshtml);
+ free(safequery);
+ free(resultshtml);
+ return response;
+}
+char * sc_logshtml (struct sc_cache * c) { /* remember to free on caller, remember not to report errors here whilst locked */
+ char * html = malloc(SC_ALLOC_CHUNK);
+ html[0] = '\0';
+ size_t html_written = 0;
+ size_t html_sizeof = 0;
+ pthread_rwlock_rdlock(c->logentries_lock);
+ if (!c->logentries) {
+ free(html);
+ return NULL;
+ }
+ for (size_t i = 0; i < c->logentries_length; i++) {
+#define SC_HLF "<div class=result id=log%lu>[<span class=%s>%s</span>] %s " \
+ "<a href=\"" SC_I18N_GIT_URL "/src/branch/master/%s#L%lu\">%s()@%s:%lu</a>: %s</div>"
+#define SC_HLA i, \
+ sc_log_str(c->logentries[i]->type), \
+ sc_log_str(c->logentries[i]->type), \
+ formatted_time, \
+ c->logentries[i]->file, \
+ c->logentries[i]->line, \
+ c->logentries[i]->function, /* compile-time burned in values are safe from xss :) */ \
+ c->logentries[i]->file, \
+ c->logentries[i]->line, \
+ safemessage /* ... whereas this might contain < */
+ struct tm tm;
+ char formatted_time[128];
+ localtime_r(&c->logentries[i]->time, &tm);
+ strftime(formatted_time, 128, SC_I18N_DATETIME_FORMAT, &tm);
+ char * safemessage = htmlspecialchars(c->logentries[i]->message);
+ size_t ws = snprintf(NULL, 0, SC_HLF, SC_HLA);
+ SC_HRC(html, ws);
+ html_written += sprintf(html+html_written, SC_HLF, SC_HLA);
+ free(safemessage);
+ }
+ pthread_rwlock_unlock(c->logentries_lock);
+ return html;
+}
+int sc_httpd (void * cls,
+ struct MHD_Connection * connection,
+ const char * url,
+ const char * method,
+ const char * version,
+ const char * upload_data,
+ size_t * upload_data_size,
+ void ** ptr) {
+ struct sc_cache * c = (struct sc_cache *) cls;
+ static int dummy;
+ struct MHD_Response * httpd_response;
+ int ret;
+ if (0 != strcmp(method, "GET"))
+ return MHD_NO; /* unexpected method */
+ if (&dummy != *ptr) {
+ /* the first time only the headers are valid, do not respond in the first round ... */
+ *ptr = &dummy;
+ return MHD_YES;
+ }
+ if (0 != *upload_data_size)
+ return MHD_NO; /* upload data in a GET?! */
+ *ptr = NULL; /* clear context pointer */
+ char * response = NULL;
+ enum MHD_ResponseMemoryMode mhdrmm = MHD_RESPMEM_MUST_FREE;
+ const char * query = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "q");
+ const char * host = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "Host");
+ char * location = "//git.sijanec.eu/sijanec/sear.c";
+ char * content_type = "text/html";
+ int status_code = MHD_HTTP_OK;
+ if (!host)
+ host = "";
+ struct sc_query * q = NULL;
+ if (!query) {
+ if (url[0] == '/')
+ switch (url[1]) {
+ case 's': /* security.txt */
+ case '.': /* .well-known/security.txt */
+ mhdrmm = MHD_RESPMEM_PERSISTENT;
+ response = sc_securitytxt;
+ content_type = "text/plain";
+ break;
+ case 'r': /* robots.txt */
+ mhdrmm = MHD_RESPMEM_PERSISTENT;
+ response = sc_robotstxt;
+ content_type = "text/plain";
+ break;
+ case 'o': /* osdd.xml - opensearch description document */
+ response = malloc(strlen(sc_osdd)+strlen(host));
+ sprintf(response, sc_osdd, host);
+ content_type = "application/opensearchdescription+xml";
+ break;
+ case 'l': /* logs.html */
+ {
+ char * logshtml = sc_logshtml(c);
+ response = malloc(strlen((char *) sc_hp)+strlen(SC_I18N_LOGS)+strlen(logshtml ? logshtml : SC_I18N_LOGS_ERROR));
+ sprintf(response, (char *) sc_hp, "", "", SC_I18N_LOGS, logshtml ? logshtml : SC_I18N_LOGS_ERROR);
+ free(logshtml);
+ }
+ break;
+ }
+ if (!response) {
+ response = malloc(strlen((char *) sc_hp)+strlen(SC_I18N_HP_HEADING)+strlen(SC_I18N_HP_BODY));
+ sprintf(response, (char *) sc_hp, "", "", SC_I18N_HP_HEADING, SC_I18N_HP_BODY);
+ }
+ } else {
+ int already_retried = 0;
+retry:
+ SC_CRLE(c, c->queries_lock);
+ for (size_t i = 0; i < c->queries_length; i++)
+ if (!strcmp(c->queries[i]->string, query))
+ q = c->queries[i];
+ if (q) {
+ response = sc_queryhtml(q); /* MHD_create_response_from_buffer will free response (; */
+ if (MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "f") && q->results_length > 0) {
+ status_code = 307;
+ location = q->results[0]->url;
+ }
+ SC_CUE(c, c->queries_lock);
+ } else {
+ SC_CUE(c, c->queries_lock);
+ sc_query_google(query, c, NULL);
+ if (already_retried++) {
+ char * safequery = htmlspecialchars(query);
+ response = malloc(strlen((char*) sc_hp)+strlen(safequery)*2+strlen(SC_I18N_HP_ERROR_HEADING)+strlen(SC_I18N_HP_ERROR_BODY));
+ sprintf(response, (char *) sc_hp, safequery, safequery, SC_I18N_HP_ERROR_HEADING, SC_I18N_HP_ERROR_BODY);
+ free(safequery);
+ } else goto retry;
+ }
+ }
+ httpd_response = MHD_create_response_from_buffer (strlen(response), (void *) response, mhdrmm);
+ MHD_add_response_header(httpd_response, "Content-Type", content_type);
+ if (status_code >= 300 && status_code <= 399)
+ MHD_add_response_header(httpd_response, "Location", location);
+ ret = MHD_queue_response(connection, status_code, httpd_response);
+ MHD_destroy_response(httpd_response);
+ return ret;
+}
diff --git a/src/i18n.h b/src/i18n.h
index e69de29..7335100 100644
--- a/src/i18n.h
+++ b/src/i18n.h
@@ -0,0 +1,18 @@
+#define SC_I18N_NO_TITLE "ni naslova"
+#define SC_I18N_NO_HREFLINK "/? ni hiperpovezave"
+#define SC_I18N_NO_DESCRIPTION "ni opisa"
+#define SC_I18N_HP_HEADING "dobrodošli na prvo stran <code>sear.c</code>"
+#define SC_I18N_HP_BODY "<code>sear.c</code> je program za anonimizacijo in predpomnenje rezultatov spletnih iskalnikov. " \
+ "Za uporabo nekaj vnesite v iskalno vrstico zgoraj in pritisnite gumb za iskanje."
+#define SC_I18N_NUMBER_OF_RESULTS "število zadetkov"
+#define SC_I18N_QUERY_TIME "čas poizvedbe"
+#define SC_I18N_DATETIME_FORMAT "%c"
+#define SC_I18N_UNLOCKING "odklepanje"
+#define SC_I18N_LOCKING "zaklepanje"
+#define SC_I18N_FAILED "ni uspelo"
+#define SC_I18N_HP_ERROR_HEADING "napaka!"
+#define SC_I18N_HP_ERROR_BODY "Pri pridobivanju rezultatov je api klic odvrnil s kodo, ki označuje neuspelo stanje. " \
+ "Preberite <a href=/logs.html>dnevniške zapise</a>."
+#define SC_I18N_LOGS "dnevniški zapisi"
+#define SC_I18N_LOGS_ERROR "napaka pri branju dnevniških datotek"
+#define SC_I18N_GIT_URL "//git.sijanec.eu/sijanec/sear.c"
diff --git a/src/lib.c b/src/lib.c
index 2c3e34a..5c0576e 100644
--- a/src/lib.c
+++ b/src/lib.c
@@ -1,4 +1,4 @@
-static htmlDocPtr parseHtmlDocument(const char * d, const char * b /* base url */) {
+htmlDocPtr parseHtmlDocument (const char * d, const char * b /* base url */) {
if (!b)
b = "";
htmlParserCtxtPtr parser_context = htmlNewParserCtxt();
@@ -6,28 +6,112 @@ static htmlDocPtr parseHtmlDocument(const char * d, const char * b /* base url *
htmlFreeParserCtxt(parser_context);
return document;
}
-static xmlXPathObjectPtr findNodes(htmlDocPtr document, const char * xpath_query) {
+xmlXPathObjectPtr findNodes (htmlDocPtr document, const char * xpath_query) {
xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(document);
xmlXPathObjectPtr nodes = xmlXPathEvalExpression(BAD_CAST xpath_query, xpath_ctx);
+ if (xmlXPathNodeSetIsEmpty(nodes->nodesetval)) {
+ xmlXPathFreeContext(xpath_ctx);
+ xmlXPathFreeObject(nodes);
+ return NULL;
+ }
+ xmlXPathFreeContext(xpath_ctx);
+ return nodes;
+}
+xmlXPathObjectPtr findNodesN (xmlNodePtr node, const char * xpath_query) {
+ xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(node->doc);
+ xmlXPathSetContextNode(node, xpath_ctx);
+ xmlXPathObjectPtr nodes = xmlXPathNodeEval(node, BAD_CAST xpath_query, xpath_ctx);
+ if (xmlXPathNodeSetIsEmpty(nodes->nodesetval)) {
+ xmlXPathFreeContext(xpath_ctx);
+ xmlXPathFreeObject(nodes);
+ return NULL;
+ }
xmlXPathFreeContext(xpath_ctx);
return nodes;
}
-typedef void (*node_function_t)(xmlNodePtr node, void * data);
-static void eachNode(xmlXPathObjectPtr nodes, node_function_t f, void * data) {
+typedef void (*node_function_t) (xmlNodePtr node, void * data);
+void eachNode (xmlXPathObjectPtr nodes, node_function_t f, void * data) { /* you can instead use EACHNODE macro */
xmlNodeSetPtr nodeset = nodes->nodesetval;
int i, size = nodeset->nodeNr;
for (i = 0; i < size; i++) {
xmlNodePtr cur;
- cur = (xmlNodePtr)nodeset->nodeTab[i];
+ cur = (xmlNodePtr) nodeset->nodeTab[i];
f(cur, data);
}
}
-void printLinkNode(xmlNodePtr node, void * data) {
+void eachNodeX (htmlDocPtr doc, const char * xpath, node_function_t f, void * data) {
+ xmlXPathObjectPtr nodes = findNodes(doc, xpath);
+ if (!nodes)
+ return;
+ eachNode(nodes, f, data);
+ xmlXPathFreeObject(nodes);
+}
+xmlNodePtr nthNodeXN (xmlNodePtr node, const char * xpath, int n) {
+ xmlXPathObjectPtr nodes = findNodesN(node, xpath);
+ if (!nodes)
+ return NULL;
+ xmlNodeSetPtr nodeset = nodes->nodesetval;
+ int size = nodeset->nodeNr;
+ if (size <= n)
+ return NULL;
+ xmlNodePtr toreturn = (xmlNodePtr) nodeset->nodeTab[n];
+ xmlXPathFreeObject(nodes);
+ return toreturn;
+}
+#define EACHNODE(node, nodes) /* you can instead use eachNodeX with anonymous function - no need to free and findnodes separatl */ \
+ for (int EACHNODE_i = 0; \
+ nodes ? nodes->nodesetval ? \
+ ((EACHNODE_i < nodes->nodesetval->nodeNr) && (node = (xmlNodePtr)nodes->nodesetval->nodeTab[EACHNODE_i])) \
+ : 0 : 0; \
+ EACHNODE_i++)
+/* // to ne dela
+#define EACHNODEX(node, target, xpath) \
+ xmlXPathObjectPtr EACHNODEX_nodes##__LINE__ = findNodes(target, xpath); \
+ for (size_t EACHNODEX_i = 0; \
+ EACHNODEX_nodes##__LINE__ ? EACHNODEX_nodes##__LINE__->nodesetval \
+ ? ((EACHNODEX_i < EACHNODEX_nodes##__LINE__->nodesetval->nodeNr) \
+ && (node = (xmlNodePtr) EACHNODEX_nodes##__LINE__->nodesetval->nodeTab[EACHNODEX_i])) \
+ : xmlXPathFreeObject(EACHNODEX_nodes##__LINE__) \
+ : 0 : 0; \
+ EACHNODEX_i++)
+*/
+void printNode (xmlNodePtr node, void * data) {
+ if (data){}
if (node->type == XML_ELEMENT_NODE) {
- xmlAttrPtr href = xmlHasProp(node, BAD_CAST "href");
- if (href) {
- printf("-> Link to '%s'\n", xmlGetProp(node, BAD_CAST "href"));
+ printf("-> content: '%s'\n", (char *) xmlNodeGetContent(node));
+ }
+}
+#define gnu_code_start \
+ _Pragma ("GCC diagnostic push") \
+ _Pragma ("GCC diagnostic ignored \"-Wpedantic\"")
+#define gnu_code_end \
+ _Pragma ("GCC diagnostic pop")
+/* this is the definition of the anonymous function - source: https://en.wikipedia.org/wiki/Anonymous_function#GCC */
+#define lambda(l_ret_type, l_arguments, l_body) \
+ ({ \
+ l_ret_type l_anonymous_functions_name l_arguments \
+ l_body \
+ &l_anonymous_functions_name; \
+ })
+char * htmlspecialchars (const char * i) { /* remember to free the output */
+ size_t s = 128;
+ char * o = malloc(s);
+ size_t w = 0;
+ for (; *i; i++) {
+ if (s - w <= 10)
+ o = realloc(o, (s *= 1.5));
+ switch (*i) {
+ case '<':
+ w += sprintf(o+w, "&lt;");
+ break;
+ case '"':
+ w += sprintf(o+w, "&quot;");
+ break;
+ default:
+ o[w++] = *i;
+ break;
}
}
+ o[w++] = '\0';
+ return o;
}
-
diff --git a/src/log.c b/src/log.c
index d229512..5e4dc16 100644
--- a/src/log.c
+++ b/src/log.c
@@ -31,7 +31,7 @@ int sc_push_log (unsigned char t, struct sc_cache * c, const char * ca, char * f
return -2;
if (pthread_rwlock_wrlock(lock))
return -3;
- if (c->logentries_sizeof - c->logentries_length != 0)
+ if (c->logentries_sizeof <= c->logentries_length)
SC_BIGGER_ARRAY(c->logentries, sc_logentry);
c->logentries_length++;
size_t strlenm = strlen(m);
@@ -53,6 +53,7 @@ int sc_push_log (unsigned char t, struct sc_cache * c, const char * ca, char * f
SC_PLL->line = l;
SC_PLL->function = ca;
SC_PLL->time = time(NULL);
+ SC_PLL->type = t;
fprintf(stderr, "[sear.c] %s %s()@%s:%lu: %s\n", sc_log_str(t), ca, f, l, SC_PLL->message); /* in posix, this is thread safe */
if (lock && pthread_rwlock_unlock(lock))
return -4;
diff --git a/src/main.c b/src/main.c
index c42d5c0..3e8c544 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,3 +1,4 @@
+#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -16,11 +17,25 @@
#include <libxml/HTMLtree.h>
#include <libxml/tree.h>
#include <libxml/xpath.h>
+#include <microhttpd.h>
#include <lib.c>
#include <url.c>
unsigned char sc_hp[] = { /* html page null terminated format string, from file src/hp.html */
#include <hp.xxd>
};
+char sc_osdd[] = { /* xml document for opensearch */
+#include <osdd.xxd>
+};
+char sc_robotstxt[] = "User-Agent: *\nDisallow: /\n";
+char sc_securitytxt[] = "# This content information is provided by the developer of this opensource application.\n"
+ "# The developer is not responsible for the actions of his software\n"
+ "# This website IS NOT operated by the developer. Do not use the contact information below in hopes of contacting the webmaster\n"
+ "# The following contact information is provided for reporting security bugs regarding the software, not for legal issues\n"
+ "Contact: https://www.sijanec.eu/o.html#kontakt\n"
+ "Acknowledgments: https://git.sijanec.eu/sijanec/sear.c\n"
+ "Encryption: https://www.sijanec.eu/pgp-key.txt\n"
+ "Expires: Thu, 31 Dec 2021 18:37:07 -0800\n"
+ "Preferred-Languages: sl, en, de, hr\n";
#define SC_HTTP_PORT 7327 /* SEAR on mobile keyboard */
#define SC_HTTP_RBUFSIZE 4096 /* initial size of http read buffer, increasning by K */
#define SC_HTTP_USER_AGENT "Nokia WAP Gateway 4.1 CD1/ECD13_D/4.1.04)" /* so google and others sends a minimal response */
@@ -28,16 +43,25 @@ unsigned char sc_hp[] = { /* html page null terminated format string, from file
#include <structs.c>
#include <log.c>
#include <api.c>
+#include <httpd.c>
/* this is new in my programs. I am now using _sizeof for the actual alloced size of the array and _length for the count of elements in array. this is done to decrease number of calls to realloc&amis */
int main (int argc, char ** argv) {
int rs = 0;
struct sc_cache * c = sc_cache_init();
+ struct MHD_Daemon * d;
if (!c) {
rs = 1;
goto rc;
}
- sc_query_google("slovenia", c);
+ d = MHD_start_daemon(MHD_USE_THREAD_PER_CONNECTION, SC_HTTP_PORT, NULL, NULL, &sc_httpd, c, MHD_OPTION_END);
+ if (!d) {
+ rs = 2;
+ goto rc;
+ }
+ /* sc_query_google(argv[1], c); */
+ getc(stdin);
rc:
sc_cache_free(c);
+ MHD_stop_daemon(d);
return rs;
}
diff --git a/src/osdd.xml b/src/osdd.xml
new file mode 100644
index 0000000..9522910
--- /dev/null
+++ b/src/osdd.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+ <!-- this file is a format string, so make sure it does not exceed ~4000 characters and it has percents escaped with
+ percent percent. -->
+<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/" >
+ <ShortName>sear.c</ShortName>
+ <Description>sear.c</Description>
+ <Url type="text/html" method="get" template="http://%s/?q={searchTerms}" />
+</OpenSearchDescription>
diff --git a/src/structs.c b/src/structs.c
index fa4228c..b99f1eb 100644
--- a/src/structs.c
+++ b/src/structs.c
@@ -33,8 +33,7 @@ struct sc_result {
char * url; /* yesfree */
char * desc; /* yesfree */
char * title; /* yesfree */
- time_t date; /* some search engines like to extract a date from a website, store that here */
- char * html; /* yesfree - cached generated html output of said result or NULL before it's created */
+ time_t date; /* some search engines like to extract a date from a website, store that here - not implemented */
unsigned short int rating; /* some search engines like to extract a rating from a website, store that here */ /* not implementd */
unsigned short int rating_max; /* max rating when above is used /\ */ /* not implemented yet */
};
@@ -48,7 +47,6 @@ int sc_result_free (struct sc_result * r) {
free(r->url);
free(r->desc);
free(r->title);
- free(r->html);
free(r);
return 1;
}
@@ -58,7 +56,6 @@ struct sc_query {
char * string; /* yesfree - query string, stripped of any excess characters that should be excluded from indexing */
time_t lookup_time; /* time of last lookup */
unsigned char engines; /* with what engine(s) was the query done - bitmask - if there are results from multiple engines */
- char * html; /* yesfree - cached generated HTML output of the result or NULL before it's created */
};
struct sc_query * sc_query_init () {
struct sc_query * q = calloc(1, sizeof(struct sc_query));
@@ -74,7 +71,6 @@ int sc_query_free (struct sc_query * q) {
if (!q)
return -1;
free(q->string); /* if they were not alloced, they are NULL, if they were free'd somewhere else, they are also set to NULL */
- free(q->html); /* setting to NULL here is not necessary, as we'll never use this query struct again */
for (size_t i = 0; i < q->results_sizeof; i++)
sc_result_free(q->results[i]);
free(q);
diff --git a/src/url.c b/src/url.c
index a3a29e7..df93138 100644
--- a/src/url.c
+++ b/src/url.c
@@ -1,5 +1,5 @@
-int urlencode (char * o, char * i /* o must have at least strlen(i)*3+1 bytes of memory allocated */) {
- size_t written = 0;
+int urlencode (char * o, const char * i /* o must have at least strlen(i)*3+1 bytes of memory allocated */) {
+ size_t written = 0; /* o CANNOT be equal to i, unlike in urldecode */
for (; *i; i++) {
if (isalnum(*i) || *i == '.' || *i == '_' || *i == '-' || *i == '~') {
o[written++] = *i;
@@ -8,10 +8,11 @@ int urlencode (char * o, char * i /* o must have at least strlen(i)*3+1 bytes of
written += 3;
}
}
+ o[written++] = '\0';
return 1;
}
-int urldecode (char * o, char * i /* o must have at least strlen(i)+1 bytes memory allocated */) {
- size_t written = 0;
+int urldecode (char * o, const char * i /* o must have at least strlen(i)+1 bytes memory allocated */) {
+ size_t written = 0; /* o can be equal to i for decoding in-place */
char buf[] = "00";
for (; *i; i++) {
if (*i == '%') {
@@ -26,5 +27,6 @@ int urldecode (char * o, char * i /* o must have at least strlen(i)+1 bytes memo
o[written++] = *i;
}
}
+ o[written++] = '\0';
return 1;
}