summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsijanec <anton@sijanec.eu>2021-04-01 23:30:37 +0200
committersijanec <anton@sijanec.eu>2021-04-01 23:30:37 +0200
commit579048eaf89784ec1da8592d96311fafd49aea1a (patch)
tree61bf0c50c656f2b16ed8901ec3b07fb468ffb916
downloadsear.c-579048eaf89784ec1da8592d96311fafd49aea1a.tar
sear.c-579048eaf89784ec1da8592d96311fafd49aea1a.tar.gz
sear.c-579048eaf89784ec1da8592d96311fafd49aea1a.tar.bz2
sear.c-579048eaf89784ec1da8592d96311fafd49aea1a.tar.lz
sear.c-579048eaf89784ec1da8592d96311fafd49aea1a.tar.xz
sear.c-579048eaf89784ec1da8592d96311fafd49aea1a.tar.zst
sear.c-579048eaf89784ec1da8592d96311fafd49aea1a.zip
-rw-r--r--.gitignore3
-rw-r--r--Makefile27
-rw-r--r--README.md40
-rw-r--r--src/api.c163
-rw-r--r--src/hp.html49
-rw-r--r--src/i18n.h0
-rw-r--r--src/lib.c33
-rw-r--r--src/log.c60
-rw-r--r--src/main.c43
-rw-r--r--src/structs.c118
-rw-r--r--src/url.c30
-rw-r--r--test/nanohttp.c32
12 files changed, 598 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2a46c53
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+sear.c
+tmp/
+valgrind-out.txt
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..a5f5a31
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,27 @@
+DESTDIR=/
+
+default:
+ mkdir tmp -p
+ xxd -i < src/hp.html > tmp/hp.xxd
+ echo ', 0' >> tmp/hp.xxd
+ gcc -Wall -pedantic -g -Isrc -Itmp -pthread src/main.c $$(xml2-config --libs --cflags) -lmicrohttpd -osear.c
+
+install:
+ mkdir -p $(DESTDIR)/usr/bin/
+ cp sear.c $(DESTDIR)/usr/bin/
+
+distclean:
+ rm sear.c tmp -rf
+
+clean:
+ rm sear.c tmp -rf
+
+prepare:
+ sudo apt install libmicrohttpd-dev xxd build-essential libxml2-dev -y
+
+test-http:
+ mkdir -p tmp
+ gcc -Wall -pedantic -g -Isrc -Itmp test/nanohttp.c $$(xml2-config --libs --cflags) -otmp/nanohttp
+
+test-http-valgrind:
+ valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose --log-file=valgrind-out.txt tmp/nanohttp http://sijanec.eu/
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..b7bbb81
--- /dev/null
+++ b/README.md
@@ -0,0 +1,40 @@
+# sear.c
+
+**not implemented! check back again in a couple of days (:**
+
+sear.c is used as a lightweight replacement for [SearX](//en.wikipedia.org/wiki/Searx) that proxies and caches search results from
+the Google web search engine. The main advantages over SearX are speed and simplicity.
+
+## instructions for debian and ubuntu systems
+
+First add my software distribution repository [prog.sijanec.eu](//prog.sijanec.eu) into your APT sources list.
+
+```
+apt install sear.c
+service sear.c start
+```
+
+## requirements
+
+* a POSIX system
+* GNU C library
+* GNU compiler collection (it's written in GNU C - it uses anonymous functions)
+* GNU Make
+* libxml2-dev (for the simple HTML/1.0 client and HTML parser)
+* libmicrohttpd-dev (for serving results - use a reverse proxy, such as nginx, for HTTPS)
+* xxd (for converting HTML pages into C arrays when compiling from source)
+
+## compiling from source
+
+```
+make prepare
+make
+```
+
+## a word about public instances
+
+It's not recommended to run a public instance of sear.c yet, as google likes to prevent spam by using a captcha.
+
+It's expected that I'll write a simple captcha forwarder so that users of public instances will solve captchas that are presented and keep the instance healthy.
+
+Currently it's recommended to run private instances, protected by a password using a reverse HTTP proxy.
diff --git a/src/api.c b/src/api.c
new file mode 100644
index 0000000..ae8d619
--- /dev/null
+++ b/src/api.c
@@ -0,0 +1,163 @@
+#define SC_CAPI(c, b, h, e, ...) sc_api(c, b, h, 0##__VA_OPT__(1), e __VA_OPT__(,) __VA_ARGS__)
+#define SC_CAPIX(c, b, h, e, ...) sc_capix(c, b, h, 0##__VA_OPT__(1), e __VA_OPT__(,) __VA_ARGS__)
+char * sc_api (struct sc_cache * c, char * body, char * headers, int isfmt, char * endpoint, ...) {
+ if (!c || !endpoint)
+ return NULL;
+ size_t va_count = parse_printf_format(endpoint, 0, NULL);
+ char * endpoint_formatted = NULL;
+ long response_code = 0;
+ if (isfmt && va_count > 0 && endpoint_formatted == NULL) {
+ va_list ap, ap2;
+ va_start(ap, endpoint);
+ va_copy(ap2, ap);
+ size_t strlenm = vsnprintf(NULL, 0, endpoint, ap);
+ endpoint_formatted = malloc(sizeof(char)*strlenm+1);
+ vsnprintf(endpoint_formatted, strlenm+1, endpoint, ap2);
+ va_end(ap);
+ va_end(ap2);
+ }
+ if (!headers)
+ headers = "";
+ char * hedrs = malloc(sizeof(char)*strlen(headers)+strlen(SC_HTTP_HEADERS)+1);
+ strcpy(hedrs, SC_HTTP_HEADERS);
+ strcat(hedrs, headers);
+ char * contentType = NULL;
+ char * redir = NULL;
+ char * buf = malloc(sizeof(char)*SC_HTTP_RBUFSIZE);
+ size_t buf_sizeof = SC_HTTP_RBUFSIZE;
+ size_t buf_length = 0;
+ int readstatus = 0;
+ void * r = xmlNanoHTTPMethodRedir(
+ endpoint_formatted ? endpoint_formatted : endpoint,
+ body ? "POST" : "GET",
+ body,
+ &contentType,
+ &redir,
+ hedrs,
+ body ? strlen(body) : 0
+ );
+ if (!r) {
+ SC_LOG(SC_LOG_ERROR, c, "!r, endpoint: %s", endpoint_formatted ? endpoint_formatted : endpoint);
+ goto rc;
+ }
+ response_code = xmlNanoHTTPReturnCode(r);
+ if (!(response_code - 200 >= 0 && response_code - 200 < 100)) {
+ SC_LOG(SC_LOG_ERROR, c, "response_code == %ld, endpoint: %s", response_code, endpoint_formatted ? endpoint_formatted:endpoint);
+ }
+ while ((readstatus = xmlNanoHTTPRead(r, buf+buf_length, buf_sizeof-buf_length)) > 0) {
+ buf_length += readstatus;
+ if (buf_sizeof-buf_length < SC_HTTP_RBUFSIZE) {
+ buf_sizeof *= SC_REALLOC_K;
+ buf = realloc(buf, sizeof(char)*buf_sizeof);
+ }
+ }
+ if (readstatus == -1)
+ SC_LOG(SC_LOG_ERROR, c, "readstatus == -1, endpoint: %s", endpoint_formatted ? endpoint_formatted : endpoint);
+ xmlNanoHTTPClose(r);
+ SC_LOG(SC_LOG_DEBUG, c, "contentType = %s, redir = %s", contentType ? contentType : "NULL", redir ? redir : "NULL");
+rc:
+ free(endpoint_formatted);
+ free(contentType);
+ free(redir);
+ free(hedrs);
+ return buf;
+}
+htmlDocPtr sc_capix (struct sc_cache * c, char * body, char * headers, int isfmt, char * endpoint, ...) {
+ if (!c || !endpoint)
+ return NULL;
+ size_t va_count = parse_printf_format(endpoint, 0, NULL);
+ char * endpoint_formatted = NULL;
+ if (isfmt && va_count > 0 && endpoint_formatted == NULL) {
+ va_list ap, ap2;
+ va_start(ap, endpoint);
+ va_copy(ap2, ap);
+ size_t strlenm = vsnprintf(NULL, 0, endpoint, ap);
+ endpoint_formatted = malloc(sizeof(char)*strlenm+1);
+ vsnprintf(endpoint_formatted, strlenm+1, endpoint, ap2);
+ va_end(ap);
+ va_end(ap2);
+ }
+ char * buf = sc_api(c, body, headers, 0, endpoint_formatted ? endpoint_formatted : endpoint);
+ htmlDocPtr htmldoc = parseHtmlDocument(buf, endpoint_formatted ? endpoint_formatted : endpoint);
+ free(buf);
+ free(endpoint_formatted);
+ return htmldoc;
+}
+char * sc_find_class (char * haystack, const char * definition) { /* you must free class after calling */
+ if (!haystack || !definition)
+ return NULL;
+ char * class = strstr(haystack, definition);
+ if (!class)
+ return NULL;
+ int found = 0;
+ for (; class > haystack; class--)
+ if (class[-1] == '.' && (found = 1))
+ break;
+ if (!found)
+ return NULL;
+ char * endofclass = class;
+ found = 0;
+ for (; *endofclass; endofclass++) /* google only has alphanumeric class names. TODO: be pedantic and conformic to w3 stds */
+ if (!isalnum(endofclass[0]) && (found = 1))
+ break;
+ if (!found)
+ return NULL;
+ char * toreturn = malloc(endofclass-class+1);
+ strncpy(toreturn, class, endofclass-class);
+ toreturn[endofclass-class] = '\0';
+ return toreturn;
+}
+int sc_query_google (char * s, struct sc_cache * c) {
+ /*
+ remarks:
+ * we are using wap.google.com over HTTP and with a user-agent string of a nokia mobile phone, so we get a lite website
+ * we determine which class holds a specific value by looking at the css definitions
+ - result title: the only class that has definition {color:#1967D2;font-size:14px;line-height:16px}
+ + A links have this class set, but they have a child SPAN element that then holds the text of the title
+ + A href points to a tracking relative link, starting with /url?q=. the q parameter contains the (obv urlencoded) link.
+ - result date: class has only one definition, {color:#70757a}, but same definition has the class for the settings A link.
+ + extract those two classes and find the one that is only present on SPAN text elements.
+ - result description: once we have the result div, the description is the // span with the appropriate class
+ + the appropriate class is the only one with {word-break:break-word}. note that this class also describes other elements.
+ - result div: to get the result div, we need the parent of the parent of the A link of the title.
+ * result dates are sometimes relative ("an hour ago") and heavily depend on the client location, based on IP.
+ - we won't parse those yet
+ * I couldn't find anything with ratings, so we won't parse thouse either yet
+ * captcha: google knows that this nokia phone we're pretending to be doesn't support javascript
+ - the request limiting captcha must work on a phone without javascript. it is probably loaded inside an iframe, but has
+ origin protection, so we can't just solve it client-side. we would have to proxy images and create some sort of a session
+ based http request-response based user interface so we can ask the user to complete the captcha. this is not yet
+ implemeted and will be hard work.
+ */
+ if (!s || !c)
+ return -1;
+ int rs = 1;
+ char * us = malloc(sizeof(char)*strlen(s)*3+1);
+ urlencode(us, s);
+ char * txtdoc = SC_CAPI(c, NULL, NULL, "http://wap.google.com/search?q=%s", us);
+ // fprintf(stdout, "%s\n", txtdoc);
+ free(us);
+ if (!txtdoc) {
+ rs = -2;
+ goto rc;
+ }
+ char * titleclass = sc_find_class(txtdoc, "{color:#1967D2;font-size:14px;line-height:16px}");
+ if (!titleclass) {
+ SC_LOG(SC_LOG_ERROR, c, "!titleclass");
+ rs = -3;
+ goto rc;
+ }
+#define SC_GTXF "/html/body//a[contains(@class, '%s')]" // @class='fuLhoc ZWRArf'"
+ char * xpath = malloc(strlen(titleclass)+strlen(SC_GTXF));
+ sprintf(xpath, SC_GTXF, titleclass); /* whenever starts with titleclas */
+ fprintf(stdout, "%s\n", xpath);
+ htmlDocPtr xmldoc = parseHtmlDocument(txtdoc, NULL);
+ xmlXPathObjectPtr nodes = findNodes(xmldoc, xpath);
+ eachNode(nodes, printLinkNode, NULL);
+rc:
+ xmlFreeDoc(xmldoc);
+ free(txtdoc);
+ free(titleclass);
+ free(xpath);
+ return rs;
+}
diff --git a/src/hp.html b/src/hp.html
new file mode 100644
index 0000000..64da49d
--- /dev/null
+++ b/src/hp.html
@@ -0,0 +1,49 @@
+<!DOCTYPE html>
+<html lang=sl>
+ <!-- this file is a printf format. be sure to escape percent signs with percent percent. -->
+ <!-- this format requires the following types (in order): query string, query string, result info string, results html string -->
+ <head>
+ <meta charset=UTF-8 />
+ <title>
+ %s :: sear.c
+ </title>
+ <link rel=stylesheet href=//sijanec.eu/assets/css/styles.css?ref=sear.c /> <!-- TODO: direktno vstavljanje v dokument -->
+ <link rel="shortcut icon" href="data:image/x-icon;," type="image/x-icon"> <!-- prevents favicon lookups -->
+ <link rel="icon" href="data:;base64,iVBORw0KGgo=">
+ <style>
+ input[type=password], input[type=text], input[type=submit], input[type=button] {
+ width: 100%%;
+ height: 1,5cm;
+ font-size: 18;
+ }
+ input .125 {
+ width: 125%%;
+ }
+ input .50 {
+ width: 50%%;
+ }
+ .result:hover {
+ background: var(--bgc2);
+ }
+ </style>
+ </head>
+ <body>
+ <form>
+ <input type=text name=q class=50 value="{{ query }}" placeholder="sear.c ..." />
+ <input type=submit class=125 value=🔍 /> <!-- magnifying glass emoji -->
+ <input type=submit class=125 name=f value=Ʊ /> <!-- horseshoe unicode character -->
+ <input type=submit class=125 name=i value=🖼 /> <!-- framed picture emoji -->
+ <input type=submit class=125 name=v value=🎬 /> <!-- that thing they use in movies to denote start of a scene emoji -->
+ </form>
+ <h3>
+ %s
+ </h3>
+ %s
+ <hr>
+ <h4 align=center>
+ <a href=//git.sijanec.eu/sijanec/sear.c >
+ sear.c
+ </a>
+ </h4>
+ </body>
+</html>
diff --git a/src/i18n.h b/src/i18n.h
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/i18n.h
diff --git a/src/lib.c b/src/lib.c
new file mode 100644
index 0000000..2c3e34a
--- /dev/null
+++ b/src/lib.c
@@ -0,0 +1,33 @@
+static htmlDocPtr parseHtmlDocument(const char * d, const char * b /* base url */) {
+ if (!b)
+ b = "";
+ htmlParserCtxtPtr parser_context = htmlNewParserCtxt();
+ htmlDocPtr document = htmlCtxtReadMemory(parser_context, d, strlen(d), b, NULL /* encoding */, HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR | HTML_PARSE_RECOVER);
+ htmlFreeParserCtxt(parser_context);
+ return document;
+}
+static xmlXPathObjectPtr findNodes(htmlDocPtr document, const char * xpath_query) {
+ xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(document);
+ xmlXPathObjectPtr nodes = xmlXPathEvalExpression(BAD_CAST xpath_query, xpath_ctx);
+ xmlXPathFreeContext(xpath_ctx);
+ return nodes;
+}
+typedef void (*node_function_t)(xmlNodePtr node, void * data);
+static void eachNode(xmlXPathObjectPtr nodes, node_function_t f, void * data) {
+ xmlNodeSetPtr nodeset = nodes->nodesetval;
+ int i, size = nodeset->nodeNr;
+ for (i = 0; i < size; i++) {
+ xmlNodePtr cur;
+ cur = (xmlNodePtr)nodeset->nodeTab[i];
+ f(cur, data);
+ }
+}
+void printLinkNode(xmlNodePtr node, void * data) {
+ if (node->type == XML_ELEMENT_NODE) {
+ xmlAttrPtr href = xmlHasProp(node, BAD_CAST "href");
+ if (href) {
+ printf("-> Link to '%s'\n", xmlGetProp(node, BAD_CAST "href"));
+ }
+ }
+}
+
diff --git a/src/log.c b/src/log.c
new file mode 100644
index 0000000..d229512
--- /dev/null
+++ b/src/log.c
@@ -0,0 +1,60 @@
+const char * sc_log_str (int t) {
+ switch (t) {
+ case SC_LOG_ERROR:
+ return "SC_LOG_ERROR";
+ case SC_LOG_WARNING:
+ return "SC_LOG_WARNING";
+ case SC_LOG_INFO:
+ return "SC_LOG_INFO";
+ case SC_LOG_DEBUG:
+ return "SC_LOG_DEBUG";
+ default:
+ return "SC_LOG_UNKNOWN";
+ }
+ /* interestingly, gcc figures out there's no way for code to reach this section, therefore there's no warning "-Wreturn-type" */
+}
+int sc_logentry_free (struct sc_logentry * l) {
+ free(l->message); l->message = NULL;
+ free(l);
+ return 1;
+}
+struct sc_logentry * sc_logentry_init () {
+ struct sc_logentry * l = calloc(1, sizeof(struct sc_logentry));
+ return l;
+}
+int sc_push_log (unsigned char t, struct sc_cache * c, const char * ca, char * f, size_t l, unsigned short int isf, char * m, ...) {
+#define SC_PLL c->logentries[c->logentries_length-1]
+ if (!c)
+ return -1;
+ pthread_rwlock_t * lock = c->logentries_lock;
+ if (!lock)
+ return -2;
+ if (pthread_rwlock_wrlock(lock))
+ return -3;
+ if (c->logentries_sizeof - c->logentries_length != 0)
+ SC_BIGGER_ARRAY(c->logentries, sc_logentry);
+ c->logentries_length++;
+ size_t strlenm = strlen(m);
+ size_t va_count = parse_printf_format(m, 0, NULL);
+ if (isf && va_count > 0) {
+ va_list ap, ap2;
+ va_start(ap, m);
+ va_copy(ap2, ap);
+ strlenm = vsnprintf(NULL, 0, m, ap);
+ SC_PLL->message = malloc(sizeof(char)*strlenm+1);
+ vsnprintf(SC_PLL->message, strlenm+1, m, ap2);
+ va_end(ap);
+ va_end(ap2);
+ } else {
+ SC_PLL->message = malloc(sizeof(char)*strlenm+1);
+ strcpy(SC_PLL->message, m);
+ }
+ SC_PLL->file = f;
+ SC_PLL->line = l;
+ SC_PLL->function = ca;
+ SC_PLL->time = time(NULL);
+ fprintf(stderr, "[sear.c] %s %s()@%s:%lu: %s\n", sc_log_str(t), ca, f, l, SC_PLL->message); /* in posix, this is thread safe */
+ if (lock && pthread_rwlock_unlock(lock))
+ return -4;
+ return 1;
+}
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..c42d5c0
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,43 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <printf.h>
+#include <ctype.h>
+#include <time.h>
+#include <pthread.h>
+#include <stdatomic.h>
+#include <i18n.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+#include <libxml/nanohttp.h>
+#include <libxml/HTMLparser.h>
+#include <libxml/HTMLtree.h>
+#include <libxml/tree.h>
+#include <libxml/xpath.h>
+#include <lib.c>
+#include <url.c>
+unsigned char sc_hp[] = { /* html page null terminated format string, from file src/hp.html */
+#include <hp.xxd>
+};
+#define SC_HTTP_PORT 7327 /* SEAR on mobile keyboard */
+#define SC_HTTP_RBUFSIZE 4096 /* initial size of http read buffer, increasning by K */
+#define SC_HTTP_USER_AGENT "Nokia WAP Gateway 4.1 CD1/ECD13_D/4.1.04)" /* so google and others sends a minimal response */
+#define SC_HTTP_HEADERS "User-Agent: " SC_HTTP_USER_AGENT "\r\n"
+#include <structs.c>
+#include <log.c>
+#include <api.c>
+/* this is new in my programs. I am now using _sizeof for the actual alloced size of the array and _length for the count of elements in array. this is done to decrease number of calls to realloc&amis */
+int main (int argc, char ** argv) {
+ int rs = 0;
+ struct sc_cache * c = sc_cache_init();
+ if (!c) {
+ rs = 1;
+ goto rc;
+ }
+ sc_query_google("slovenia", c);
+rc:
+ sc_cache_free(c);
+ return rs;
+}
diff --git a/src/structs.c b/src/structs.c
new file mode 100644
index 0000000..fa4228c
--- /dev/null
+++ b/src/structs.c
@@ -0,0 +1,118 @@
+#define SC_ALLOC_CHUNK 128 /* how many x to alloc when allocing (for performance so we don't call malloc over and over again) */
+#define SC_IN_STRUCT_ARRAY(type, name) _Atomic(type **) name; _Atomic(size_t) name##_sizeof; _Atomic(size_t) name##_length
+#define SC_CWLE(c, name) (pthread_rwlock_wrlock(name) ? (SC_LOG(SC_LOG_ERROR,c,SC_I18N_LOCKING " " #name " " SC_I18N_FAILED)||1) :0)
+#define SC_CRLE(c, name) (pthread_rwlock_rdlock(name) ? (SC_LOG(SC_LOG_ERROR,c,SC_I18N_LOCKING " " #name " " SC_I18N_FAILED)||1) :0)
+#define SC_CUE(c, name) (pthread_rwlock_unlock(name) ? (SC_LOG(SC_LOG_ERROR,c,SC_I18N_UNLOCKING " " #name " " SC_I18N_FAILED)||1):0)
+#define SC_REALLOC_K 1.5 /* constant to dynamically realloc large arrays (new size = current size * K) */
+#define SC_ENGINE_GOOGLE (1 << 0)
+/* _Atomic(size_t) sc_mem_max = 100e6; */ /* the really soft memory limit of the program: 100MB - NOT IMPLEMENTED */
+#define SC_LOG(t, c, m, ...) sc_push_log(t, c, __func__, __FILE__, __LINE__, 0##__VA_OPT__(1), m __VA_OPT__(,) __VA_ARGS__)
+#define SC_LOG_ERROR (1 << 0)
+#define SC_LOG_WARNING (1 << 1)
+#define SC_LOG_INFO (1 << 2)
+#define SC_LOG_DEBUG (1 << 3)
+#define SC_BIGGER_ARRAY(name, type) do { \
+ name = realloc(name, sizeof(name[0])*name##_sizeof*SC_REALLOC_K); \
+ for (size_t i = name##_sizeof; i < name##_sizeof*SC_REALLOC_K; i++) \
+ name[i] = type##_init(); \
+ name##_sizeof = name##_sizeof*SC_REALLOC_K; \
+ } while (0);
+struct sc_logentry {
+ unsigned char type; /* SC_LOG_ERROR, SC_LOG_WARNING, SC_LOG_INFO, SC_LOG_DEBUG */
+ size_t line;
+ const char * function; /* nofree */
+ char * file; /* nofree */
+ char * message; /* yesfree */
+ time_t time;
+};
+int sc_logentry_free (struct sc_logentry * l); /* defined in log.c */
+struct sc_logentry * sc_logentry_init (); /* defined in log.c */
+
+struct sc_result {
+ struct sc_query * query; /* nofree - free from sc_cache */
+ char * url; /* yesfree */
+ char * desc; /* yesfree */
+ char * title; /* yesfree */
+ time_t date; /* some search engines like to extract a date from a website, store that here */
+ char * html; /* yesfree - cached generated html output of said result or NULL before it's created */
+ unsigned short int rating; /* some search engines like to extract a rating from a website, store that here */ /* not implementd */
+ unsigned short int rating_max; /* max rating when above is used /\ */ /* not implemented yet */
+};
+struct sc_result * sc_result_init () {
+ struct sc_result * r = calloc(1, sizeof(struct sc_result));
+ return r;
+}
+int sc_result_free (struct sc_result * r) {
+ if (!r)
+ return -1;
+ free(r->url);
+ free(r->desc);
+ free(r->title);
+ free(r->html);
+ free(r);
+ return 1;
+}
+struct sc_query {
+ struct sc_cache * cache; /* nofree - what cache owns this query */
+ SC_IN_STRUCT_ARRAY(struct sc_result, results); /* yesfree */
+ char * string; /* yesfree - query string, stripped of any excess characters that should be excluded from indexing */
+ time_t lookup_time; /* time of last lookup */
+ unsigned char engines; /* with what engine(s) was the query done - bitmask - if there are results from multiple engines */
+ char * html; /* yesfree - cached generated HTML output of the result or NULL before it's created */
+};
+struct sc_query * sc_query_init () {
+ struct sc_query * q = calloc(1, sizeof(struct sc_query));
+ q->results_sizeof = SC_ALLOC_CHUNK;
+ q->results = calloc(q->results_sizeof, sizeof(struct sc_result *));
+ for (size_t i = 0; i < q->results_sizeof; i++) {
+ q->results[i] = sc_result_init();
+ q->results[i]->query = q;
+ }
+ return q;
+}
+int sc_query_free (struct sc_query * q) {
+ if (!q)
+ return -1;
+ free(q->string); /* if they were not alloced, they are NULL, if they were free'd somewhere else, they are also set to NULL */
+ free(q->html); /* setting to NULL here is not necessary, as we'll never use this query struct again */
+ for (size_t i = 0; i < q->results_sizeof; i++)
+ sc_result_free(q->results[i]);
+ free(q);
+ return 1;
+}
+struct sc_cache {
+ SC_IN_STRUCT_ARRAY(struct sc_query, queries); /* yesfree */
+ pthread_rwlock_t * queries_lock;
+ SC_IN_STRUCT_ARRAY(struct sc_logentry, logentries); /* yesfree */
+ pthread_rwlock_t * logentries_lock;
+};
+struct sc_cache * sc_cache_init() {
+ struct sc_cache * c = calloc(1, sizeof(struct sc_cache));
+ c->queries_sizeof = SC_ALLOC_CHUNK;
+ c->logentries_sizeof = SC_ALLOC_CHUNK;
+ c->queries = calloc(c->queries_sizeof, sizeof(struct sc_query *));
+ c->logentries = calloc(c->logentries_sizeof, sizeof(struct sc_logentry *));
+ for (size_t i = 0; i < c->queries_sizeof; i++) {
+ c->queries[i] = sc_query_init();
+ c->queries[i]->cache = c;
+ c->logentries[i] = sc_logentry_init();
+ }
+#define SC_CILI(name) do { name##_lock = malloc(sizeof(pthread_rwlock_t)); pthread_rwlock_init(name##_lock, NULL); } while (0)
+ SC_CILI(c->queries);
+ SC_CILI(c->logentries);
+ return c;
+}
+int sc_cache_free(struct sc_cache * c) {
+ if (!c)
+ return -1;
+ for (size_t i = 0; i < c->queries_sizeof; i++)
+ sc_query_free(c->queries[i]);
+ free(c->queries);
+ for (size_t i = 0; i < c->logentries_sizeof; i++)
+ sc_logentry_free(c->logentries[i]);
+ #define SC_CFLD(name) do { pthread_rwlock_destroy(name##_lock); free(name##_lock); } while(0)
+ SC_CFLD(c->queries);
+ SC_CFLD(c->logentries);
+ free(c);
+ return 1;
+}
diff --git a/src/url.c b/src/url.c
new file mode 100644
index 0000000..a3a29e7
--- /dev/null
+++ b/src/url.c
@@ -0,0 +1,30 @@
+int urlencode (char * o, char * i /* o must have at least strlen(i)*3+1 bytes of memory allocated */) {
+ size_t written = 0;
+ for (; *i; i++) {
+ if (isalnum(*i) || *i == '.' || *i == '_' || *i == '-' || *i == '~') {
+ o[written++] = *i;
+ } else {
+ sprintf(o+written, "%%%02X", *i);
+ written += 3;
+ }
+ }
+ return 1;
+}
+int urldecode (char * o, char * i /* o must have at least strlen(i)+1 bytes memory allocated */) {
+ size_t written = 0;
+ char buf[] = "00";
+ for (; *i; i++) {
+ if (*i == '%') {
+ buf[0] = *++i;
+ buf[1] = *++i;
+ if (!buf[0] || !buf[1]) { /* malformed */
+ o[written++] = '\0';
+ return 0;
+ }
+ o[written++] = strtol(buf, NULL, 16);
+ } else {
+ o[written++] = *i;
+ }
+ }
+ return 1;
+}
diff --git a/test/nanohttp.c b/test/nanohttp.c
new file mode 100644
index 0000000..78d9fda
--- /dev/null
+++ b/test/nanohttp.c
@@ -0,0 +1,32 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <libxml/nanohttp.h>
+#define UA "Nokia WAP Gateway 4.1 CD1/ECD13_D/4.1.04)"
+int main (int argc, char ** argv) {
+ if (argc < 1+1) {
+ fprintf(stderr, "%s <URL>\n", argv[0]);
+ return 1;
+ }
+ char * contentType = NULL;
+ char * redir = NULL;
+ int readstatus = 0;
+ char buf[4096];
+ /* xmlNanoHTTPInit(); */
+ void * r = xmlNanoHTTPMethodRedir(argv[1], "GET", NULL /* inp */, &contentType, &redir, "User-Agent: " UA "\r\n", 0 /* inplen */);
+ if (!r) {
+ fprintf(stderr, "!r\n");
+ return 1;
+ }
+ if (xmlNanoHTTPReturnCode(r) != 200)
+ fprintf(stderr, "xmlNanoHTTPReturnCode(r) != 200\n");
+ while ((readstatus = xmlNanoHTTPRead(r, buf, 4096)) > 1)
+ write(1, buf, readstatus);
+ if (readstatus == -1)
+ fprintf(stderr, "readstatus == -1\n");
+ xmlNanoHTTPClose(r);
+ free(contentType);
+ free(redir);
+ /* xmlNanoHTTPCleanup(); */
+ return 0;
+}