summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnton Luka Šijanec <anton@sijanec.eu>2021-12-26 19:52:31 +0100
committerAnton Luka Šijanec <anton@sijanec.eu>2021-12-26 19:52:31 +0100
commita10a8fb335e5a817e1a9add49ee179394eea67c4 (patch)
tree70ea95120f4390a6a5f32cda5a8bf1d45e4e0487
parent0.0.16, read debian/changelog (diff)
downloadsear.c-a10a8fb335e5a817e1a9add49ee179394eea67c4.tar
sear.c-a10a8fb335e5a817e1a9add49ee179394eea67c4.tar.gz
sear.c-a10a8fb335e5a817e1a9add49ee179394eea67c4.tar.bz2
sear.c-a10a8fb335e5a817e1a9add49ee179394eea67c4.tar.lz
sear.c-a10a8fb335e5a817e1a9add49ee179394eea67c4.tar.xz
sear.c-a10a8fb335e5a817e1a9add49ee179394eea67c4.tar.zst
sear.c-a10a8fb335e5a817e1a9add49ee179394eea67c4.zip
-rw-r--r--.gitignore1
-rw-r--r--README.md6
-rw-r--r--src/api.c18
-rw-r--r--src/httpd.c24
-rw-r--r--src/i18n.h10
-rw-r--r--src/log.c4
-rw-r--r--src/main.c5
-rw-r--r--src/structs.c91
-rw-r--r--test/tsearch.c15
9 files changed, 135 insertions, 39 deletions
diff --git a/.gitignore b/.gitignore
index 5e9d484..7620845 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@ sear.c
tmp/
valgrind-out.txt
core
+a.out
diff --git a/README.md b/README.md
index 8c96e3f..adac2e9 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ service sear.c start
## requirements
* a POSIX system
-* GNU C library
+* GNU C library (uses `tdestroy(3)` if compiled without `SC_OLD_STORAGE`)
* GNU compiler collection (it's written in GNU C - it uses nested functions)
* GNU Make
* libxml2-dev (for the simple HTML/1.0 client and HTML parser)
@@ -32,12 +32,12 @@ make
## instructions
-* run the daemon - it starts listening on HTTP port 7327 (remember it by picturing phone keyboard buttons with letters SEAR (; )
+* run the daemon - it starts listening on HTTP port 7327 (remember it by picturing phone keyboard buttons with letters SEAR (; ) - port can be set with the environment variable `SC_PORT`
* optional: create a reverse proxy for HTTPS
* navigate to [http://localhost:7327](http://localhost:7327) and do a couple of searches to see if everything works
* the horseshoe button redirects directly to the first result without wasting time on the results page. use if you feel lucky. (BP)
* the painting button performs a search for images. PRIVACY WARNING: images are loaded directly from servers (not from google)
-* program writes all logs to standard error
+* program writes all logs to standard error (and to `/logs.html` if compiled with `SC_LOGMEM`)
* setting the h parameter will rewrite links to HTTP from HTTPS
* setting the l parameter with a number will limit number of displayed links to that number.
diff --git a/src/api.c b/src/api.c
index 5ab4083..a366882 100644
--- a/src/api.c
+++ b/src/api.c
@@ -87,7 +87,7 @@ htmlDocPtr sc_capix (struct sc_cache * c, char * body, char * headers, int isfmt
char * sc_find_class (char * haystack, const char * definition) { /* you must free class after calling */
if (!haystack || !definition)
return NULL;
- char * class = strstr(haystack, definition);
+ char * class = strcasestr(haystack, definition);
if (!class)
return NULL;
int found = 0;
@@ -117,7 +117,7 @@ int sc_fix_url (char ** h) { /* fixes a (result) URL in-place (removes tracking
urldecode(*h, *h);
}
char * c = NULL;
- if ((c = strstr(*h, "googleweblight.com/fp?u="))) { /* stage 2: url may be "light web" tracking url by google results */
+ if ((c = strcasestr(*h, "googleweblight.com/fp?u="))) { /* stage 2: url may be "light web" tracking url by google results */
*h = c+strlen("googleweblight.com/fp?u="); /* we could disable this with a cookie but meh, this is easier and _stateless_ */
*strchrnul(*h, '&') = '\0';
urldecode(*h, *h);
@@ -156,6 +156,7 @@ struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct s
goto rc;
}
int qwasgiven = 0;
+ int sl = strlen(s);
if (!q)
q = sc_query_init();
else
@@ -214,6 +215,7 @@ struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct s
char * imgrefurl = NULL; /* easy, huh? */
SC_LOG(SC_LOG_DEBUG, c, "hreflink = %s", hreflink);
sscanf(hreflink, "/imgres?imgurl=%m[^&]&imgrefurl=%m[^&]", &imgurl, &imgrefurl);
+ xmlFree(hreflink);
if (!imgurl && !imgrefurl) {
SC_LOG(SC_LOG_ERROR, c, "!imgurl && !imgrefurl, txtdoc = %s", txtdoc);
/* rs = -6; */ /* we continue running not fail because of a single picture */
@@ -257,8 +259,9 @@ struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct s
if (hreflink) {
SC_GTR->url = malloc(strlen(hreflink)+1);
strcpy(SC_GTR->url, hreflink);
- xmlFree(orig_hreflink_for_free);
} else SC_GTR->url = NULL;
+ if (orig_hreflink_for_free)
+ xmlFree(orig_hreflink_for_free);
cp = (char *) xmlNodeGetContent(descnode);
if (cp) {
SC_GTR->desc = malloc(strlen(cp)+1);
@@ -285,17 +288,20 @@ struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct s
}
q->cache = c;
q->lookup_time = time(NULL);
- q->engines = SC_ENGINE_GOOGLE;
- q->string = realloc(q->string, strlen(s)+1);
- q->opt = opt;
+ q->string = realloc(q->string, sl+1);
+ q->opt |= opt | SC_ENGINE_GOOGLE;
strcpy(q->string, s);
if (!qwasgiven) {
SC_CWLE(c, c->queries_lock);
+#ifdef SC_OLD_STORAGE
if (c->queries_sizeof <= c->queries_length)
SC_BIGGER_ARRAY(c->queries, sc_query, 0);
c->queries_length++;
#define SC_GTQ c->queries[c->queries_length-1]
SC_GTQ = q;
+#else /* we don't detect here if query is already stored, but it should not be ... */
+ tsearch(q, &c->qrp, SC_COMPAR_CAST sc_query_compar);
+#endif
}
SC_CUE(c, c->queries_lock);
rc:
diff --git a/src/httpd.c b/src/httpd.c
index 514f57a..39604ef 100644
--- a/src/httpd.c
+++ b/src/httpd.c
@@ -3,7 +3,7 @@ char * sc_https2http (char * i) {
memmove(i+4, i+5, strlen(i)-3);
return i;
}
-char * sc_queryhtml (struct sc_query * q, const char * add_form, size_t l) { /* remember to free returned string in the caller */ /* caller takes care of freeing */
+char * sc_queryhtml (const struct sc_query * q, const char * add_form, size_t l) { /* remember to free returned string in the caller */ /* caller takes care of freeing */
size_t resultshtml_written = 0;
size_t resultshtml_sizeof = SC_ALLOC_CHUNK;
char * resultshtml = malloc(resultshtml_sizeof);
@@ -120,12 +120,12 @@ enum MHD_Result sc_httpd (void * cls,
char * location = "//git.sijanec.eu/sijanec/sear.c";
char * content_type = "text/html";
int status_code = MHD_HTTP_OK;
- SC_OPT_TYPE opt = 0;
+ SC_OPT_TYPE opt = SC_OPT_INIT;
if (MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "i"))
opt |= SC_OPT_IMAGE;
if (!host)
host = "";
- struct sc_query * q = NULL;
+ const struct sc_query * q = NULL;
char add_form[128];
const char * l = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "l");
const char * h = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "h");
@@ -172,18 +172,30 @@ enum MHD_Result sc_httpd (void * cls,
}
} else {
int already_retried = 0;
+ const struct sc_query query_to_find = {
+ .string = (char *) query,
+ .opt = opt
+ };
retry:
SC_CRLE(c, c->queries_lock);
+#ifdef SC_OLD_STORAGE
for (size_t i = 0; i < c->queries_length; i++)
- if (!strcmp(c->queries[i]->string, query) && c->queries[i]->opt == opt)
+ if (!sc_query_compar(c->queries[i], &query_to_find))
q = c->queries[i];
+#else /* tfind(3) also requires a pointer to the variable that holds rootp! */
+ const struct sc_query ** i_am_retarded = tfind(&query_to_find, &c->qrp, SC_COMPAR_CAST sc_query_compar);
+ q = i_am_retarded ? *i_am_retarded : NULL;
+#endif
if (q) {
const char * l = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "l");
- response = sc_queryhtml(q, add_form, atoi(l ? l : "0")); /* MHD_create_response_from_buffer will free response (; */
if (MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "f") && q->results_length > 0) {
+ mhdrmm = MHD_RESPMEM_PERSISTENT; /* no need to generate HTML if */
+ content_type = "text/plain"; /* we have a feeling of luck! */
+ response = SC_I18N_HORSESHOE_RESPONSE;
status_code = 307;
location = q->results[0]->url ? q->results[0]->url : SC_I18N_NO_HREFLINK;
- }
+ } else
+ response = sc_queryhtml(q, add_form, atoi(l ? l : "0")); /* MHD_create_response_from_buffer will free response (; */
SC_CUE(c, c->queries_lock);
} else {
SC_CUE(c, c->queries_lock);
diff --git a/src/i18n.h b/src/i18n.h
index 8e97c33..5d7e3be 100644
--- a/src/i18n.h
+++ b/src/i18n.h
@@ -2,8 +2,15 @@
#define SC_I18N_NO_HREFLINK "/? ni hiperpovezave"
#define SC_I18N_NO_DESCRIPTION "ni opisa"
#define SC_I18N_HP_HEADING "dobrodošli na prvo stran <code>sear.c</code>"
+#ifdef SC_OLD_STORAGE
+#define SC_I18N_STORAGE "preprost O(n) iskalnik po seznamu. "
+#else
+#define SC_I18N_STORAGE "napreden <code>POSIX</code> <code>tsearch(3)</code> iskalnik po binarnem drevesu. "
+#endif
#define SC_I18N_HP_BODY "<code>sear.c</code> je program za anonimizacijo in predpomnenje rezultatov spletnih iskalnikov. " \
- "Za uporabo nekaj vnesite v iskalno vrstico zgoraj in pritisnite gumb za iskanje."
+ "Za uporabo nekaj vnesite v iskalno vrstico zgoraj in pritisnite gumb za iskanje. " \
+ "<br><small>Uporabljen algoritem za predpomnjenje rezultatov je " SC_I18N_STORAGE \
+ "Na izbiro algoritma med prevodom vplivate z zastavico <code>SC_OLD_STORAGE</code>.</small>"
#define SC_I18N_NUMBER_OF_RESULTS "število zadetkov"
#define SC_I18N_QUERY_TIME "čas poizvedbe"
#define SC_I18N_DATETIME_FORMAT "%c"
@@ -17,3 +24,4 @@
#define SC_I18N_LOGS_ERROR "napaka pri branju dnevnikov"
#define SC_I18N_LOGS_NOT_ENABLED "Zbiranje dnevniških zapisov v delovni pomnilnik ni omogočeno. <code>sear.c</code> prevedite z <code>make -e CC=\"cc -DSC_LOGMEM\"</code>; z nastavitvijo zastavice <code>SC_LOGMEM</code> omogočite pregled dnevniških zapisov znotraj aplikacije. Vselej pa se vsi dnevniški zapisi pišejo tudi na standardni izhod, kar se v primeru uporabe <code>sear.c</code> kot <code>systemd</code> storitve shranjuje v sistemske dnevnike."
#define SC_I18N_GIT_URL "//git.sijanec.eu/sijanec/sear.c"
+#define SC_I18N_HORSESHOE_RESPONSE "Če vidite to besedilo, vaš brskalnik ne podpira preusmeritev. V tem primeru ne uporabljajte argumenta f."
diff --git a/src/log.c b/src/log.c
index e7ee4f8..4c46804 100644
--- a/src/log.c
+++ b/src/log.c
@@ -1,5 +1,5 @@
-const char * sc_log_str (int t) {
- switch (t) {
+const char * sc_log_str (SC_OPT_TYPE t) {
+ switch (t & SC_LOG_MASK) {
case SC_LOG_ERROR:
return "SC_LOG_ERROR";
case SC_LOG_WARNING:
diff --git a/src/main.c b/src/main.c
index 6576fb8..32c7e96 100644
--- a/src/main.c
+++ b/src/main.c
@@ -21,6 +21,9 @@
#include <libxml/tree.h>
#include <libxml/xpath.h>
#include <microhttpd.h>
+#ifndef SC_OLD_STORAGE
+#include <search.h>
+#endif
#include <lib.c>
#include <url.c>
unsigned char sc_hp[] = { /* html page null terminated format string, from file src/hp.html */
@@ -39,7 +42,7 @@ char sc_securitytxt[] = "# This content information is provided by the developer
"Encryption: https://www.sijanec.eu/pgp-key.txt\n"
"Expires: Thu, 31 Dec 2021 18:37:07 -0800\n"
"Preferred-Languages: sl, en, de, hr\n";
-#define SC_HTTP_PORT 7327 /* SEAR on mobile keyboard */
+#define SC_HTTP_PORT (getenv("SC_PORT") ? atoi(getenv("SC_PORT")) : 7327) /* SEAR on mobile keyboard */
#define SC_HTTP_RBUFSIZE 4096 /* initial size of http read buffer, increasning by K */
#define SC_HTTP_USER_AGENT "Nokia WAP Gateway 4.1 CD1/ECD13_D/4.1.04)" /* so google and others sends a minimal response */
#define SC_HTTP_HEADERS "User-Agent: " SC_HTTP_USER_AGENT "\r\n"
diff --git a/src/structs.c b/src/structs.c
index 83d19b9..650eb88 100644
--- a/src/structs.c
+++ b/src/structs.c
@@ -4,22 +4,28 @@
#define SC_CRLE(c, name) (pthread_rwlock_rdlock(name) ? (SC_LOG(SC_LOG_ERROR,c,SC_I18N_LOCKING " " #name " " SC_I18N_FAILED)||1) :0)
#define SC_CUE(c, name) (pthread_rwlock_unlock(name) ? (SC_LOG(SC_LOG_ERROR,c,SC_I18N_UNLOCKING " " #name " " SC_I18N_FAILED)||1):0)
#define SC_REALLOC_K 1.5 /* constant to dynamically realloc large arrays (new size = current size * K) */
-#define SC_ENGINE_GOOGLE (1 << 0)
/* _Atomic(size_t) sc_mem_max = 100e6; */ /* the really soft memory limit of the program: 100MB - NOT IMPLEMENTED */
#define SC_LOG(t, c, m, ...) sc_push_log(t, c, __func__, __FILE__, __LINE__, 0##__VA_OPT__(1), m __VA_OPT__(,) __VA_ARGS__)
-#define SC_LOG_ERROR (1 << 0)
-#define SC_LOG_WARNING (1 << 1)
-#define SC_LOG_INFO (1 << 2)
-#define SC_LOG_DEBUG (1 << 3)
#define SC_BIGGER_ARRAY(name, type, shallinit) do { \
name = realloc(name, sizeof(name[0])*ceil(name##_sizeof*SC_REALLOC_K)); \
for (size_t i = name##_sizeof; shallinit && (i < ceil(name##_sizeof*SC_REALLOC_K)); i++) \
name[i] = type##_init(); \
name##_sizeof = ceil(name##_sizeof*SC_REALLOC_K); /* ceil je ZELO pomemben, če je chunk 1 recimo */ \
} while (0);
-#define SC_OPT_TYPE unsigned char
-#define SC_OPT_IMAGE (1 << 0)
#define SC_STR(x) #x
+enum sc_opt {
+ SC_ENGINE_GOOGLE = 1 << 0,
+ SC_LOG_ERROR = 1 << 1,
+ SC_LOG_WARNING = 1 << 2,
+ SC_LOG_INFO = 1 << 3,
+ SC_LOG_DEBUG = 1 << 4,
+ SC_OPT_IMAGE = 1 << 5
+};
+#define SC_LOG_MASK (SC_LOG_ERROR | SC_LOG_WARNING | SC_LOG_INFO | SC_LOG_DEBUG)
+#define SC_OPT_TYPE enum sc_opt
+#define SC_OPT_INIT 0
+#define SC_OPT_COMPAR /* mask */ (/* SC_ENGINE_GOOGLE | */ /* any engine is okay */ SC_OPT_IMAGE)
+#define SC_COMPAR_CAST (int (*)(const void *, const void *))
#ifdef SC_LOGMEM
struct sc_logentry {
unsigned char type; /* SC_LOG_ERROR, SC_LOG_WARNING, SC_LOG_INFO, SC_LOG_DEBUG */
@@ -32,6 +38,19 @@ struct sc_logentry {
int sc_logentry_free (struct sc_logentry * l); /* defined in log.c */
struct sc_logentry * sc_logentry_init (); /* defined in log.c */
#endif
+struct sc_cache {
+#ifdef SC_OLD_STORAGE
+ SC_IN_STRUCT_ARRAY(struct sc_query, queries); /* yesfree */
+#else
+ void * qrp; /* queries root pointer-tsearch(3) */
+#endif
+ pthread_rwlock_t * queries_lock;
+#ifdef SC_LOGMEM
+ SC_IN_STRUCT_ARRAY(struct sc_logentry, logentries); /* yesfree */
+ pthread_rwlock_t * logentries_lock;
+#endif
+};
+int sc_push_log (unsigned char t, struct sc_cache * c, const char * ca, char * f, size_t l, unsigned short int isf, char * m, ...);
struct sc_result {
struct sc_query * query; /* nofree - free from sc_cache */
char * url; /* yesfree - url of referer page when image searching */
@@ -65,8 +84,7 @@ struct sc_query {
SC_IN_STRUCT_ARRAY(struct sc_result, results); /* yesfree */
char * string; /* yesfree - query string, stripped of any excess characters that should be excluded from indexing */
time_t lookup_time; /* time of last lookup */
- unsigned char engines; /* with what engine(s) was the query done - bitmask - if there are results from multiple engines */
- SC_OPT_TYPE opt; /* some options */
+ SC_OPT_TYPE opt; /* some options including engines */
};
struct sc_query * sc_query_init () {
struct sc_query * q = calloc(1, sizeof(struct sc_query));
@@ -77,31 +95,60 @@ struct sc_query * sc_query_init () {
q->results[i]->query = q;
}
q->string = NULL;
+ q->opt = SC_OPT_INIT;
return q;
}
-int sc_query_free (struct sc_query * q) {
+#ifdef SC_OLD_STORAGE
+int
+#else
+void
+#endif
+sc_query_free (
+#ifdef SC_OLD_STORAGE
+ struct sc_query
+#else
+ void
+#endif
+ * i) {
+ struct sc_query * q =
+#ifndef SC_OLD_STORAGE
+ (struct sc_query *)
+#endif
+ i;
if (!q)
- return -1;
+ return
+#ifdef SC_OLD_STORAGE
+ -1
+#endif
+ ;
+ if (q->cache)
+ SC_LOG(SC_LOG_DEBUG, q->cache, "sc_query_free: %s", q->string ? q->string : "NULL");
free(q->string); /* if they were not alloced, they are NULL, if they were free'd somewhere else, they are also set to NULL */
for (size_t i = 0; i < q->results_sizeof; i++)
sc_result_free(q->results[i]);
free(q->results);
free(q);
- return 1;
-}
-struct sc_cache {
- SC_IN_STRUCT_ARRAY(struct sc_query, queries); /* yesfree */
- pthread_rwlock_t * queries_lock;
-#ifdef SC_LOGMEM
- SC_IN_STRUCT_ARRAY(struct sc_logentry, logentries); /* yesfree */
- pthread_rwlock_t * logentries_lock;
+ return
+#ifdef SC_OLD_STORAGE
+ 1
#endif
-};
+ ;
+}
+int sc_query_compar (const struct sc_query * a, const struct sc_query * b) {
+#define SC_QUERY_COMPAR_OPT ->opt & SC_OPT_COMPAR
+ int r = (a SC_QUERY_COMPAR_OPT) < (b SC_QUERY_COMPAR_OPT) ? -1
+ : (a SC_QUERY_COMPAR_OPT) > (b SC_QUERY_COMPAR_OPT) ? 1 : 0;
+ if (r) /* first we compare options, because it should be faster than query strings */
+ return r; /* yeah, I know, useless optimizations */
+ return strcmp(a->string, b->string);
+}
struct sc_cache * sc_cache_init() {
#define SC_CILI(name) do { name##_lock = malloc(sizeof(pthread_rwlock_t)); pthread_rwlock_init(name##_lock, NULL); } while (0)
struct sc_cache * c = calloc(1, sizeof(struct sc_cache));
+#ifdef SC_OLD_STORAGE
c->queries_sizeof = SC_ALLOC_CHUNK;
c->queries = calloc(c->queries_sizeof, sizeof(struct sc_query *));
+#endif
#ifdef SC_LOGMEM
c->logentries_sizeof = SC_ALLOC_CHUNK;
c->logentries = calloc(c->logentries_sizeof, sizeof(struct sc_logentry *));
@@ -116,10 +163,14 @@ int sc_cache_free(struct sc_cache * c) {
#define SC_CFLD(name) do { pthread_rwlock_destroy(name##_lock); free(name##_lock); } while(0)
if (!c)
return -1;
+#ifdef SC_OLD_STORAGE
fprintf(stderr, "c->queries_sizeof = %zu\n", c->queries_sizeof);
for (size_t i = 0; i < c->queries_sizeof; i++)
sc_query_free(c->queries[i]);
free(c->queries);
+#else
+ tdestroy(c->qrp, sc_query_free);
+#endif
#ifdef SC_LOGMEM
for (size_t i = 0; i < c->logentries_sizeof; i++)
sc_logentry_free(c->logentries[i]);
diff --git a/test/tsearch.c b/test/tsearch.c
new file mode 100644
index 0000000..45c4eef
--- /dev/null
+++ b/test/tsearch.c
@@ -0,0 +1,15 @@
+#include <stdio.h>
+#include <search.h>
+#include <string.h>
+#define CAST (int (*) (const void *, const void *))
+int compar (const char * a, const char * b) {
+ return strcmp(a, b);
+}
+int main () {
+ void * root = NULL;
+ tsearch("key1", &root, CAST compar);
+ tsearch("key1", &root, CAST compar);
+ tsearch("key2", &root, CAST compar);
+ fprintf(stdout, "this should say key1: %s\n", *(char **) tfind("key1", &root, CAST compar));
+ return 0;
+}