diff options
author | sijanec <anton@sijanec.eu> | 2021-04-03 23:15:48 +0200 |
---|---|---|
committer | sijanec <anton@sijanec.eu> | 2021-04-03 23:15:48 +0200 |
commit | bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4 (patch) | |
tree | f9960c7a43f7c0e1da6cb8e8656fcbda2129677a | |
parent | initial commit (diff) | |
download | sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.gz sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.bz2 sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.lz sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.xz sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.zst sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.zip |
-rw-r--r-- | Makefile | 7 | ||||
-rw-r--r-- | README.md | 4 | ||||
-rw-r--r-- | src/api.c | 103 | ||||
-rw-r--r-- | src/hp.html | 49 | ||||
-rw-r--r-- | src/httpd.c | 169 | ||||
-rw-r--r-- | src/i18n.h | 18 | ||||
-rw-r--r-- | src/lib.c | 104 | ||||
-rw-r--r-- | src/log.c | 3 | ||||
-rw-r--r-- | src/main.c | 26 | ||||
-rw-r--r-- | src/osdd.xml | 8 | ||||
-rw-r--r-- | src/structs.c | 6 | ||||
-rw-r--r-- | src/url.c | 10 |
12 files changed, 453 insertions, 54 deletions
@@ -4,7 +4,9 @@ default: mkdir tmp -p xxd -i < src/hp.html > tmp/hp.xxd echo ', 0' >> tmp/hp.xxd - gcc -Wall -pedantic -g -Isrc -Itmp -pthread src/main.c $$(xml2-config --libs --cflags) -lmicrohttpd -osear.c + xxd -i < src/osdd.xml > tmp/osdd.xxd + echo ', 0' >> tmp/osdd.xxd + gcc -Wall -Wextra -pedantic -Wno-unused-parameter -g -Isrc -Itmp -pthread src/main.c $$(xml2-config --libs --cflags) -lmicrohttpd -osear.c install: mkdir -p $(DESTDIR)/usr/bin/ @@ -25,3 +27,6 @@ test-http: test-http-valgrind: valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose --log-file=valgrind-out.txt tmp/nanohttp http://sijanec.eu/ + +valgrind: + valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose --log-file=valgrind-out.txt ./sear.c @@ -2,12 +2,12 @@ **not implemented! check back again in a couple of days (:** -sear.c is used as a lightweight replacement for [SearX](//en.wikipedia.org/wiki/Searx) that proxies and caches search results from +sear.c is used as a lightweight replacement for [SearX](https://en.wikipedia.org/wiki/Searx) that proxies and caches search results from the Google web search engine. The main advantages over SearX are speed and simplicity. ## instructions for debian and ubuntu systems -First add my software distribution repository [prog.sijanec.eu](//prog.sijanec.eu) into your APT sources list. +First add my software distribution repository [prog.sijanec.eu](https://prog.sijanec.eu) into your APT sources list. ``` apt install sear.c @@ -107,7 +107,9 @@ char * sc_find_class (char * haystack, const char * definition) { /* you must fr toreturn[endofclass-class] = '\0'; return toreturn; } -int sc_query_google (char * s, struct sc_cache * c) { +struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct sc_query * q) { /* check for cached queries first! */ + /* query is in most cases NULL. then it will be allocated and put into sc_cache. otherwise response will be put into passed q. */ + /* if query is not NULL, it MUST be initialized */ /* remarks: * we are using wap.google.com over HTTP and with a user-agent string of a nokia mobile phone, so we get a lite website @@ -117,7 +119,7 @@ int sc_query_google (char * s, struct sc_cache * c) { + A href points to a tracking relative link, starting with /url?q=. the q parameter contains the (obv urlencoded) link. - result date: class has only one definition, {color:#70757a}, but same definition has the class for the settings A link. + extract those two classes and find the one that is only present on SPAN text elements. - - result description: once we have the result div, the description is the // span with the appropriate class + - result description: once we have the result div, the description is the //table//span with the appropriate class + the appropriate class is the only one with {word-break:break-word}. note that this class also describes other elements. - result div: to get the result div, we need the parent of the parent of the A link of the title. * result dates are sometimes relative ("an hour ago") and heavily depend on the client location, based on IP. @@ -129,35 +131,106 @@ int sc_query_google (char * s, struct sc_cache * c) { based http request-response based user interface so we can ask the user to complete the captcha. this is not yet implemeted and will be hard work. */ - if (!s || !c) - return -1; - int rs = 1; + int rs; + if (!s || !c) { + rs = -1; + goto rc; + } + int qwasgiven = 0; + if (!q) + q = sc_query_init(); + else + qwasgiven++; char * us = malloc(sizeof(char)*strlen(s)*3+1); urlencode(us, s); + char * xpath = NULL; + char * descclass = NULL; + char * titleclass = NULL; char * txtdoc = SC_CAPI(c, NULL, NULL, "http://wap.google.com/search?q=%s", us); // fprintf(stdout, "%s\n", txtdoc); free(us); if (!txtdoc) { + SC_LOG(SC_LOG_ERROR, c, "!txtdoc"); rs = -2; goto rc; } - char * titleclass = sc_find_class(txtdoc, "{color:#1967D2;font-size:14px;line-height:16px}"); - if (!titleclass) { - SC_LOG(SC_LOG_ERROR, c, "!titleclass"); + titleclass = sc_find_class(txtdoc, "{color:#1967D2;font-size:14px;line-height:16px}"); + descclass = sc_find_class(txtdoc, "{word-break:break-word}"); + if (!titleclass || !descclass) { + SC_LOG(SC_LOG_ERROR, c, "!titleclass || !descclass"); rs = -3; goto rc; } -#define SC_GTXF "/html/body//a[contains(@class, '%s')]" // @class='fuLhoc ZWRArf'" - char * xpath = malloc(strlen(titleclass)+strlen(SC_GTXF)); +#define SC_GTXF "/html/body//a[contains(@class, '%s')]" /* result a */ +#define SC_GTXD "../..//table//span[@class='%s']" +#define SC_GTR q->results[q->results_length-1] + xpath = malloc(strlen(titleclass)+strlen(SC_GTXF)); sprintf(xpath, SC_GTXF, titleclass); /* whenever starts with titleclas */ - fprintf(stdout, "%s\n", xpath); htmlDocPtr xmldoc = parseHtmlDocument(txtdoc, NULL); - xmlXPathObjectPtr nodes = findNodes(xmldoc, xpath); - eachNode(nodes, printLinkNode, NULL); -rc: + if (qwasgiven) /* as you can see, when q is given, queries will be write-locked for the whole XML processing time! */ + SC_CWLE(c, c->queries_lock); + q->results_length = 0; + gnu_code_start; + eachNodeX(xmldoc, xpath, + lambda(void, (xmlNodePtr node, void * data), + { + if (node->type == XML_ELEMENT_NODE) { + xmlAttrPtr href = xmlHasProp(node, BAD_CAST "href"); + if (href) { + char * hreflink = (char *) xmlGetProp(node, BAD_CAST "href"); + if (!strncmp(hreflink, "/url?q=", strlen("/url?q="))) { + hreflink = hreflink+strlen("/url?q="); + *strchrnul(hreflink, '&') = '\0'; + urldecode(hreflink, hreflink); + } + char * x = malloc(strlen(descclass)+strlen(SC_GTXD)); + sprintf(x, SC_GTXD, descclass /* remember, kids, GNU C is fucking legendary */); + xmlNodePtr descnode = nthNodeXN(node, x, 0); + free(x); + if (q->results_sizeof <= q->results_length) + SC_BIGGER_ARRAY(q->results, sc_result); + q->results_length++; + SC_GTR->query = q; + SC_GTR->title = (char *) xmlNodeGetContent(node->children); + if (!SC_GTR->title) { + SC_GTR->title = malloc(strlen(SC_I18N_NO_TITLE)+1); + strcpy(SC_GTR->title, SC_I18N_NO_TITLE); + } + SC_GTR->url = hreflink; + if (!SC_GTR->url) { + SC_GTR->url = malloc(strlen(SC_I18N_NO_HREFLINK)+1); + strcpy(SC_GTR->url, SC_I18N_NO_HREFLINK); + } + SC_GTR->desc = (char *) xmlNodeGetContent(descnode); + if (!SC_GTR->desc) { + SC_GTR->desc = malloc(strlen(SC_I18N_NO_DESCRIPTION)+1); + strcpy(SC_GTR->desc, SC_I18N_NO_DESCRIPTION); + } + } + } + } + ), + NULL); + gnu_code_end; + q->cache = c; + q->lookup_time = time(NULL); + q->engines = SC_ENGINE_GOOGLE; + q->string = realloc(q->string, strlen(s)+1); + strcpy(q->string, s); + if (!qwasgiven) { + SC_CWLE(c, c->queries_lock); + if (c->queries_sizeof <= c->queries_length) + SC_BIGGER_ARRAY(c->queries, sc_query); + c->queries_length++; +#define SC_GTQ c->queries[c->queries_length-1] + SC_GTQ = q; + } + SC_CUE(c, c->queries_lock); xmlFreeDoc(xmldoc); +rc: free(txtdoc); free(titleclass); + free(descclass); free(xpath); - return rs; + return (rs < 0) ? NULL : q; } diff --git a/src/hp.html b/src/hp.html index 64da49d..d2bc82f 100644 --- a/src/hp.html +++ b/src/hp.html @@ -10,30 +10,49 @@ <link rel=stylesheet href=//sijanec.eu/assets/css/styles.css?ref=sear.c /> <!-- TODO: direktno vstavljanje v dokument --> <link rel="shortcut icon" href="data:image/x-icon;," type="image/x-icon"> <!-- prevents favicon lookups --> <link rel="icon" href="data:;base64,iVBORw0KGgo="> + <link rel=search" type="application/opensearchdescription+xml" href="/osdd.xml"> <style> input[type=password], input[type=text], input[type=submit], input[type=button] { - width: 100%%; - height: 1,5cm; - font-size: 18; - } - input .125 { - width: 125%%; - } - input .50 { - width: 50%%; + height: 1cm; + font-size: 18px; } .result:hover { background: var(--bgc2); } + .container { + display: flex; + flex-direction: row; + flex-wrap: nowrap; + justify-content: center; + align-items: stretch; + } + input[name=q] { + flex-grow: 4; + } + input[type=submit] { + flex-basis: 12.5%%; + } + .SC_LOG_ERROR { + color: red; + } + .SC_LOG_WARNING { + color: orange; + } + .SC_LOG_INFO { + color: lightgreen; + } + .SC_LOG_DEBUG { + color: magenta; + } </style> </head> <body> - <form> - <input type=text name=q class=50 value="{{ query }}" placeholder="sear.c ..." /> - <input type=submit class=125 value=🔍 /> <!-- magnifying glass emoji --> - <input type=submit class=125 name=f value=Ʊ /> <!-- horseshoe unicode character --> - <input type=submit class=125 name=i value=🖼 /> <!-- framed picture emoji --> - <input type=submit class=125 name=v value=🎬 /> <!-- that thing they use in movies to denote start of a scene emoji --> + <form class=container> + <input accesskey=4 type=text name=q value="%s" placeholder="sear.c ..." /> <!-- see www.standardaccesskeys.com --> + <input type=submit value=🔍 /> <!-- magnifying glass emoji --> + <input type=submit name=f value=Ʊ /> <!-- horseshoe unicode character --> + <input type=submit name=i value=🖼 hidden=hidden /> <!-- framed picture emoji - img search not implemented --> + <input type=submit name=v value=🎬 hidden=hidden /> <!-- that thing they use in movies - vid search N/I --> </form> <h3> %s diff --git a/src/httpd.c b/src/httpd.c new file mode 100644 index 0000000..bf5c3d1 --- /dev/null +++ b/src/httpd.c @@ -0,0 +1,169 @@ +char * sc_queryhtml (struct sc_query * q) { /* remember to free returned string in the caller */ /* caller takes care of locking */ + size_t resultshtml_written = 0; + size_t resultshtml_sizeof = SC_ALLOC_CHUNK; + char * resultshtml = malloc(resultshtml_sizeof); + resultshtml[0] = '\0'; + for (size_t i = 0; i < q->results_length; i++) { +#define SC_HRC(string, wanted) \ + if (string##_written+wanted >= string##_sizeof) { \ + string##_sizeof = (string##_written+wanted+1)*SC_REALLOC_K; \ + string = realloc(string, string##_sizeof); \ + } +#define SC_HRF "<div class=result><h4><a href=\"%s\">%s</a></h4><p>%s</p></div>" + char * safetitle = htmlspecialchars(q->results[i]->title); + char * safebody = htmlspecialchars(q->results[i]->desc); + char * safeurl = htmlspecialchars(q->results[i]->url); + size_t ws = snprintf(NULL, 0, SC_HRF, safeurl, safetitle, safebody); + SC_HRC(resultshtml, ws); + resultshtml_written += sprintf(resultshtml+resultshtml_written, SC_HRF, safeurl, safetitle, safebody); + free(safetitle); + free(safebody); + free(safeurl); + } +#define SC_HRS SC_I18N_NUMBER_OF_RESULTS ": %ld | " SC_I18N_QUERY_TIME ": %s" + char formatted_time[128]; + struct tm tm; + localtime_r(&q->lookup_time, &tm); + strftime(formatted_time, 128, SC_I18N_DATETIME_FORMAT, &tm); + char queryinfo[256]; + snprintf(queryinfo, 256, SC_HRS, q->results_length, formatted_time); + char * safequery = htmlspecialchars(q->string); + char * response = malloc(strlen((char *) sc_hp)+2*strlen(safequery)+strlen(queryinfo)+strlen(resultshtml)); + sprintf(response, (char *) sc_hp, safequery, safequery, queryinfo, resultshtml); + free(safequery); + free(resultshtml); + return response; +} +char * sc_logshtml (struct sc_cache * c) { /* remember to free on caller, remember not to report errors here whilst locked */ + char * html = malloc(SC_ALLOC_CHUNK); + html[0] = '\0'; + size_t html_written = 0; + size_t html_sizeof = 0; + pthread_rwlock_rdlock(c->logentries_lock); + if (!c->logentries) { + free(html); + return NULL; + } + for (size_t i = 0; i < c->logentries_length; i++) { +#define SC_HLF "<div class=result id=log%lu>[<span class=%s>%s</span>] %s " \ + "<a href=\"" SC_I18N_GIT_URL "/src/branch/master/%s#L%lu\">%s()@%s:%lu</a>: %s</div>" +#define SC_HLA i, \ + sc_log_str(c->logentries[i]->type), \ + sc_log_str(c->logentries[i]->type), \ + formatted_time, \ + c->logentries[i]->file, \ + c->logentries[i]->line, \ + c->logentries[i]->function, /* compile-time burned in values are safe from xss :) */ \ + c->logentries[i]->file, \ + c->logentries[i]->line, \ + safemessage /* ... whereas this might contain < */ + struct tm tm; + char formatted_time[128]; + localtime_r(&c->logentries[i]->time, &tm); + strftime(formatted_time, 128, SC_I18N_DATETIME_FORMAT, &tm); + char * safemessage = htmlspecialchars(c->logentries[i]->message); + size_t ws = snprintf(NULL, 0, SC_HLF, SC_HLA); + SC_HRC(html, ws); + html_written += sprintf(html+html_written, SC_HLF, SC_HLA); + free(safemessage); + } + pthread_rwlock_unlock(c->logentries_lock); + return html; +} +int sc_httpd (void * cls, + struct MHD_Connection * connection, + const char * url, + const char * method, + const char * version, + const char * upload_data, + size_t * upload_data_size, + void ** ptr) { + struct sc_cache * c = (struct sc_cache *) cls; + static int dummy; + struct MHD_Response * httpd_response; + int ret; + if (0 != strcmp(method, "GET")) + return MHD_NO; /* unexpected method */ + if (&dummy != *ptr) { + /* the first time only the headers are valid, do not respond in the first round ... */ + *ptr = &dummy; + return MHD_YES; + } + if (0 != *upload_data_size) + return MHD_NO; /* upload data in a GET?! */ + *ptr = NULL; /* clear context pointer */ + char * response = NULL; + enum MHD_ResponseMemoryMode mhdrmm = MHD_RESPMEM_MUST_FREE; + const char * query = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "q"); + const char * host = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "Host"); + char * location = "//git.sijanec.eu/sijanec/sear.c"; + char * content_type = "text/html"; + int status_code = MHD_HTTP_OK; + if (!host) + host = ""; + struct sc_query * q = NULL; + if (!query) { + if (url[0] == '/') + switch (url[1]) { + case 's': /* security.txt */ + case '.': /* .well-known/security.txt */ + mhdrmm = MHD_RESPMEM_PERSISTENT; + response = sc_securitytxt; + content_type = "text/plain"; + break; + case 'r': /* robots.txt */ + mhdrmm = MHD_RESPMEM_PERSISTENT; + response = sc_robotstxt; + content_type = "text/plain"; + break; + case 'o': /* osdd.xml - opensearch description document */ + response = malloc(strlen(sc_osdd)+strlen(host)); + sprintf(response, sc_osdd, host); + content_type = "application/opensearchdescription+xml"; + break; + case 'l': /* logs.html */ + { + char * logshtml = sc_logshtml(c); + response = malloc(strlen((char *) sc_hp)+strlen(SC_I18N_LOGS)+strlen(logshtml ? logshtml : SC_I18N_LOGS_ERROR)); + sprintf(response, (char *) sc_hp, "", "", SC_I18N_LOGS, logshtml ? logshtml : SC_I18N_LOGS_ERROR); + free(logshtml); + } + break; + } + if (!response) { + response = malloc(strlen((char *) sc_hp)+strlen(SC_I18N_HP_HEADING)+strlen(SC_I18N_HP_BODY)); + sprintf(response, (char *) sc_hp, "", "", SC_I18N_HP_HEADING, SC_I18N_HP_BODY); + } + } else { + int already_retried = 0; +retry: + SC_CRLE(c, c->queries_lock); + for (size_t i = 0; i < c->queries_length; i++) + if (!strcmp(c->queries[i]->string, query)) + q = c->queries[i]; + if (q) { + response = sc_queryhtml(q); /* MHD_create_response_from_buffer will free response (; */ + if (MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "f") && q->results_length > 0) { + status_code = 307; + location = q->results[0]->url; + } + SC_CUE(c, c->queries_lock); + } else { + SC_CUE(c, c->queries_lock); + sc_query_google(query, c, NULL); + if (already_retried++) { + char * safequery = htmlspecialchars(query); + response = malloc(strlen((char*) sc_hp)+strlen(safequery)*2+strlen(SC_I18N_HP_ERROR_HEADING)+strlen(SC_I18N_HP_ERROR_BODY)); + sprintf(response, (char *) sc_hp, safequery, safequery, SC_I18N_HP_ERROR_HEADING, SC_I18N_HP_ERROR_BODY); + free(safequery); + } else goto retry; + } + } + httpd_response = MHD_create_response_from_buffer (strlen(response), (void *) response, mhdrmm); + MHD_add_response_header(httpd_response, "Content-Type", content_type); + if (status_code >= 300 && status_code <= 399) + MHD_add_response_header(httpd_response, "Location", location); + ret = MHD_queue_response(connection, status_code, httpd_response); + MHD_destroy_response(httpd_response); + return ret; +} @@ -0,0 +1,18 @@ +#define SC_I18N_NO_TITLE "ni naslova" +#define SC_I18N_NO_HREFLINK "/? ni hiperpovezave" +#define SC_I18N_NO_DESCRIPTION "ni opisa" +#define SC_I18N_HP_HEADING "dobrodošli na prvo stran <code>sear.c</code>" +#define SC_I18N_HP_BODY "<code>sear.c</code> je program za anonimizacijo in predpomnenje rezultatov spletnih iskalnikov. " \ + "Za uporabo nekaj vnesite v iskalno vrstico zgoraj in pritisnite gumb za iskanje." +#define SC_I18N_NUMBER_OF_RESULTS "število zadetkov" +#define SC_I18N_QUERY_TIME "čas poizvedbe" +#define SC_I18N_DATETIME_FORMAT "%c" +#define SC_I18N_UNLOCKING "odklepanje" +#define SC_I18N_LOCKING "zaklepanje" +#define SC_I18N_FAILED "ni uspelo" +#define SC_I18N_HP_ERROR_HEADING "napaka!" +#define SC_I18N_HP_ERROR_BODY "Pri pridobivanju rezultatov je api klic odvrnil s kodo, ki označuje neuspelo stanje. " \ + "Preberite <a href=/logs.html>dnevniške zapise</a>." +#define SC_I18N_LOGS "dnevniški zapisi" +#define SC_I18N_LOGS_ERROR "napaka pri branju dnevniških datotek" +#define SC_I18N_GIT_URL "//git.sijanec.eu/sijanec/sear.c" @@ -1,4 +1,4 @@ -static htmlDocPtr parseHtmlDocument(const char * d, const char * b /* base url */) { +htmlDocPtr parseHtmlDocument (const char * d, const char * b /* base url */) { if (!b) b = ""; htmlParserCtxtPtr parser_context = htmlNewParserCtxt(); @@ -6,28 +6,112 @@ static htmlDocPtr parseHtmlDocument(const char * d, const char * b /* base url * htmlFreeParserCtxt(parser_context); return document; } -static xmlXPathObjectPtr findNodes(htmlDocPtr document, const char * xpath_query) { +xmlXPathObjectPtr findNodes (htmlDocPtr document, const char * xpath_query) { xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(document); xmlXPathObjectPtr nodes = xmlXPathEvalExpression(BAD_CAST xpath_query, xpath_ctx); + if (xmlXPathNodeSetIsEmpty(nodes->nodesetval)) { + xmlXPathFreeContext(xpath_ctx); + xmlXPathFreeObject(nodes); + return NULL; + } + xmlXPathFreeContext(xpath_ctx); + return nodes; +} +xmlXPathObjectPtr findNodesN (xmlNodePtr node, const char * xpath_query) { + xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(node->doc); + xmlXPathSetContextNode(node, xpath_ctx); + xmlXPathObjectPtr nodes = xmlXPathNodeEval(node, BAD_CAST xpath_query, xpath_ctx); + if (xmlXPathNodeSetIsEmpty(nodes->nodesetval)) { + xmlXPathFreeContext(xpath_ctx); + xmlXPathFreeObject(nodes); + return NULL; + } xmlXPathFreeContext(xpath_ctx); return nodes; } -typedef void (*node_function_t)(xmlNodePtr node, void * data); -static void eachNode(xmlXPathObjectPtr nodes, node_function_t f, void * data) { +typedef void (*node_function_t) (xmlNodePtr node, void * data); +void eachNode (xmlXPathObjectPtr nodes, node_function_t f, void * data) { /* you can instead use EACHNODE macro */ xmlNodeSetPtr nodeset = nodes->nodesetval; int i, size = nodeset->nodeNr; for (i = 0; i < size; i++) { xmlNodePtr cur; - cur = (xmlNodePtr)nodeset->nodeTab[i]; + cur = (xmlNodePtr) nodeset->nodeTab[i]; f(cur, data); } } -void printLinkNode(xmlNodePtr node, void * data) { +void eachNodeX (htmlDocPtr doc, const char * xpath, node_function_t f, void * data) { + xmlXPathObjectPtr nodes = findNodes(doc, xpath); + if (!nodes) + return; + eachNode(nodes, f, data); + xmlXPathFreeObject(nodes); +} +xmlNodePtr nthNodeXN (xmlNodePtr node, const char * xpath, int n) { + xmlXPathObjectPtr nodes = findNodesN(node, xpath); + if (!nodes) + return NULL; + xmlNodeSetPtr nodeset = nodes->nodesetval; + int size = nodeset->nodeNr; + if (size <= n) + return NULL; + xmlNodePtr toreturn = (xmlNodePtr) nodeset->nodeTab[n]; + xmlXPathFreeObject(nodes); + return toreturn; +} +#define EACHNODE(node, nodes) /* you can instead use eachNodeX with anonymous function - no need to free and findnodes separatl */ \ + for (int EACHNODE_i = 0; \ + nodes ? nodes->nodesetval ? \ + ((EACHNODE_i < nodes->nodesetval->nodeNr) && (node = (xmlNodePtr)nodes->nodesetval->nodeTab[EACHNODE_i])) \ + : 0 : 0; \ + EACHNODE_i++) +/* // to ne dela +#define EACHNODEX(node, target, xpath) \ + xmlXPathObjectPtr EACHNODEX_nodes##__LINE__ = findNodes(target, xpath); \ + for (size_t EACHNODEX_i = 0; \ + EACHNODEX_nodes##__LINE__ ? EACHNODEX_nodes##__LINE__->nodesetval \ + ? ((EACHNODEX_i < EACHNODEX_nodes##__LINE__->nodesetval->nodeNr) \ + && (node = (xmlNodePtr) EACHNODEX_nodes##__LINE__->nodesetval->nodeTab[EACHNODEX_i])) \ + : xmlXPathFreeObject(EACHNODEX_nodes##__LINE__) \ + : 0 : 0; \ + EACHNODEX_i++) +*/ +void printNode (xmlNodePtr node, void * data) { + if (data){} if (node->type == XML_ELEMENT_NODE) { - xmlAttrPtr href = xmlHasProp(node, BAD_CAST "href"); - if (href) { - printf("-> Link to '%s'\n", xmlGetProp(node, BAD_CAST "href")); + printf("-> content: '%s'\n", (char *) xmlNodeGetContent(node)); + } +} +#define gnu_code_start \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wpedantic\"") +#define gnu_code_end \ + _Pragma ("GCC diagnostic pop") +/* this is the definition of the anonymous function - source: https://en.wikipedia.org/wiki/Anonymous_function#GCC */ +#define lambda(l_ret_type, l_arguments, l_body) \ + ({ \ + l_ret_type l_anonymous_functions_name l_arguments \ + l_body \ + &l_anonymous_functions_name; \ + }) +char * htmlspecialchars (const char * i) { /* remember to free the output */ + size_t s = 128; + char * o = malloc(s); + size_t w = 0; + for (; *i; i++) { + if (s - w <= 10) + o = realloc(o, (s *= 1.5)); + switch (*i) { + case '<': + w += sprintf(o+w, "<"); + break; + case '"': + w += sprintf(o+w, """); + break; + default: + o[w++] = *i; + break; } } + o[w++] = '\0'; + return o; } - @@ -31,7 +31,7 @@ int sc_push_log (unsigned char t, struct sc_cache * c, const char * ca, char * f return -2; if (pthread_rwlock_wrlock(lock)) return -3; - if (c->logentries_sizeof - c->logentries_length != 0) + if (c->logentries_sizeof <= c->logentries_length) SC_BIGGER_ARRAY(c->logentries, sc_logentry); c->logentries_length++; size_t strlenm = strlen(m); @@ -53,6 +53,7 @@ int sc_push_log (unsigned char t, struct sc_cache * c, const char * ca, char * f SC_PLL->line = l; SC_PLL->function = ca; SC_PLL->time = time(NULL); + SC_PLL->type = t; fprintf(stderr, "[sear.c] %s %s()@%s:%lu: %s\n", sc_log_str(t), ca, f, l, SC_PLL->message); /* in posix, this is thread safe */ if (lock && pthread_rwlock_unlock(lock)) return -4; @@ -1,3 +1,4 @@ +#define _GNU_SOURCE #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -16,11 +17,25 @@ #include <libxml/HTMLtree.h> #include <libxml/tree.h> #include <libxml/xpath.h> +#include <microhttpd.h> #include <lib.c> #include <url.c> unsigned char sc_hp[] = { /* html page null terminated format string, from file src/hp.html */ #include <hp.xxd> }; +char sc_osdd[] = { /* xml document for opensearch */ +#include <osdd.xxd> +}; +char sc_robotstxt[] = "User-Agent: *\nDisallow: /\n"; +char sc_securitytxt[] = "# This content information is provided by the developer of this opensource application.\n" + "# The developer is not responsible for the actions of his software\n" + "# This website IS NOT operated by the developer. Do not use the contact information below in hopes of contacting the webmaster\n" + "# The following contact information is provided for reporting security bugs regarding the software, not for legal issues\n" + "Contact: https://www.sijanec.eu/o.html#kontakt\n" + "Acknowledgments: https://git.sijanec.eu/sijanec/sear.c\n" + "Encryption: https://www.sijanec.eu/pgp-key.txt\n" + "Expires: Thu, 31 Dec 2021 18:37:07 -0800\n" + "Preferred-Languages: sl, en, de, hr\n"; #define SC_HTTP_PORT 7327 /* SEAR on mobile keyboard */ #define SC_HTTP_RBUFSIZE 4096 /* initial size of http read buffer, increasning by K */ #define SC_HTTP_USER_AGENT "Nokia WAP Gateway 4.1 CD1/ECD13_D/4.1.04)" /* so google and others sends a minimal response */ @@ -28,16 +43,25 @@ unsigned char sc_hp[] = { /* html page null terminated format string, from file #include <structs.c> #include <log.c> #include <api.c> +#include <httpd.c> /* this is new in my programs. I am now using _sizeof for the actual alloced size of the array and _length for the count of elements in array. this is done to decrease number of calls to realloc&amis */ int main (int argc, char ** argv) { int rs = 0; struct sc_cache * c = sc_cache_init(); + struct MHD_Daemon * d; if (!c) { rs = 1; goto rc; } - sc_query_google("slovenia", c); + d = MHD_start_daemon(MHD_USE_THREAD_PER_CONNECTION, SC_HTTP_PORT, NULL, NULL, &sc_httpd, c, MHD_OPTION_END); + if (!d) { + rs = 2; + goto rc; + } + /* sc_query_google(argv[1], c); */ + getc(stdin); rc: sc_cache_free(c); + MHD_stop_daemon(d); return rs; } diff --git a/src/osdd.xml b/src/osdd.xml new file mode 100644 index 0000000..9522910 --- /dev/null +++ b/src/osdd.xml @@ -0,0 +1,8 @@ +<?xml version="1.0"?> + <!-- this file is a format string, so make sure it does not exceed ~4000 characters and it has percents escaped with + percent percent. --> +<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/" > + <ShortName>sear.c</ShortName> + <Description>sear.c</Description> + <Url type="text/html" method="get" template="http://%s/?q={searchTerms}" /> +</OpenSearchDescription> diff --git a/src/structs.c b/src/structs.c index fa4228c..b99f1eb 100644 --- a/src/structs.c +++ b/src/structs.c @@ -33,8 +33,7 @@ struct sc_result { char * url; /* yesfree */ char * desc; /* yesfree */ char * title; /* yesfree */ - time_t date; /* some search engines like to extract a date from a website, store that here */ - char * html; /* yesfree - cached generated html output of said result or NULL before it's created */ + time_t date; /* some search engines like to extract a date from a website, store that here - not implemented */ unsigned short int rating; /* some search engines like to extract a rating from a website, store that here */ /* not implementd */ unsigned short int rating_max; /* max rating when above is used /\ */ /* not implemented yet */ }; @@ -48,7 +47,6 @@ int sc_result_free (struct sc_result * r) { free(r->url); free(r->desc); free(r->title); - free(r->html); free(r); return 1; } @@ -58,7 +56,6 @@ struct sc_query { char * string; /* yesfree - query string, stripped of any excess characters that should be excluded from indexing */ time_t lookup_time; /* time of last lookup */ unsigned char engines; /* with what engine(s) was the query done - bitmask - if there are results from multiple engines */ - char * html; /* yesfree - cached generated HTML output of the result or NULL before it's created */ }; struct sc_query * sc_query_init () { struct sc_query * q = calloc(1, sizeof(struct sc_query)); @@ -74,7 +71,6 @@ int sc_query_free (struct sc_query * q) { if (!q) return -1; free(q->string); /* if they were not alloced, they are NULL, if they were free'd somewhere else, they are also set to NULL */ - free(q->html); /* setting to NULL here is not necessary, as we'll never use this query struct again */ for (size_t i = 0; i < q->results_sizeof; i++) sc_result_free(q->results[i]); free(q); @@ -1,5 +1,5 @@ -int urlencode (char * o, char * i /* o must have at least strlen(i)*3+1 bytes of memory allocated */) { - size_t written = 0; +int urlencode (char * o, const char * i /* o must have at least strlen(i)*3+1 bytes of memory allocated */) { + size_t written = 0; /* o CANNOT be equal to i, unlike in urldecode */ for (; *i; i++) { if (isalnum(*i) || *i == '.' || *i == '_' || *i == '-' || *i == '~') { o[written++] = *i; @@ -8,10 +8,11 @@ int urlencode (char * o, char * i /* o must have at least strlen(i)*3+1 bytes of written += 3; } } + o[written++] = '\0'; return 1; } -int urldecode (char * o, char * i /* o must have at least strlen(i)+1 bytes memory allocated */) { - size_t written = 0; +int urldecode (char * o, const char * i /* o must have at least strlen(i)+1 bytes memory allocated */) { + size_t written = 0; /* o can be equal to i for decoding in-place */ char buf[] = "00"; for (; *i; i++) { if (*i == '%') { @@ -26,5 +27,6 @@ int urldecode (char * o, char * i /* o must have at least strlen(i)+1 bytes memo o[written++] = *i; } } + o[written++] = '\0'; return 1; } |