summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnton Luka Šijanec <anton@sijanec.eu>2024-01-05 16:44:26 +0100
committerAnton Luka Šijanec <anton@sijanec.eu>2024-01-05 16:44:26 +0100
commitdd08d816dca127808b2343005ec8a728b6cb2a2a (patch)
treec9b0bd578e38ddb42570471adff4e3f117177a20
parent0.0.25 (diff)
downloadsear.c-dd08d816dca127808b2343005ec8a728b6cb2a2a.tar
sear.c-dd08d816dca127808b2343005ec8a728b6cb2a2a.tar.gz
sear.c-dd08d816dca127808b2343005ec8a728b6cb2a2a.tar.bz2
sear.c-dd08d816dca127808b2343005ec8a728b6cb2a2a.tar.lz
sear.c-dd08d816dca127808b2343005ec8a728b6cb2a2a.tar.xz
sear.c-dd08d816dca127808b2343005ec8a728b6cb2a2a.tar.zst
sear.c-dd08d816dca127808b2343005ec8a728b6cb2a2a.zip
-rw-r--r--debian/changelog6
-rw-r--r--src/api.c44
-rw-r--r--src/httpd.c74
-rw-r--r--src/i18n.h2
-rw-r--r--src/lib.c30
-rw-r--r--src/main.c1
-rw-r--r--src/structs.c6
7 files changed, 134 insertions, 29 deletions
diff --git a/debian/changelog b/debian/changelog
index 4de38b7..14d93f6 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+sear.c (0.0.26-1) stable; urgency=low
+
+ * support for suggested queries and query redirects
+
+ -- Anton Luka Šijanec <anton@sijanec.eu> Fri, 01 Jan 2024 16:42:42 +0100
+
sear.c (0.0.25-1) stable; urgency=low
* updated git hostname
diff --git a/src/api.c b/src/api.c
index a652e3a..b0ef96d 100644
--- a/src/api.c
+++ b/src/api.c
@@ -127,6 +127,7 @@ int sc_fix_url (char ** h) { /* fixes a (result) URL in-place (removes tracking
enum sc_return sc_query_google (const char * s, /* breaking change: changed return type */
struct sc_cache * c,
struct sc_query * q,
+ char ** redirect, /* variable redirect will be set to a heap allocated string that must be freed by the caller if the upstream returned results for a different query. in that case the returned query object will be for a different search string! -- if NULL, request that upstream does not enable "results for" feature */
SC_OPT_TYPE opt) { /* check4cachedB4 */
/* query is in most cases NULL. then it will be allocated and put into sc_cache. otherwise response will be put into passed q. */
/* if query is not NULL, it MUST be initialized */
@@ -155,6 +156,9 @@ enum sc_return sc_query_google (const char * s, /* breaking change: changed retu
htmlDocPtr xmldoc = NULL;
char * txtdoc = NULL;
int qwasgiven = 0;
+ SC_LOG(SC_LOG_DEBUG, c, "%s called, redirect is %p", __func__, redirect);
+ if (redirect)
+ *redirect = NULL;
if (!s || !c) {
rs = SC_BADCALL;
goto rc;
@@ -166,7 +170,7 @@ enum sc_return sc_query_google (const char * s, /* breaking change: changed retu
qwasgiven++;
char * us = malloc(sizeof(char)*strlen(s)*3+1);
urlencode(us, s);
- txtdoc = SC_CAPI(c, NULL, NULL, "http://wap.google.com/search?q=%s&num=100&ie=UTF-8%s", us, (opt&SC_OPT_IMAGE) ? "&tbm=isch" : "");
+ txtdoc = SC_CAPI(c, NULL, NULL, "http://wap.google.com/search?q=%s&num=100&ie=UTF-8%s%s", us, (opt&SC_OPT_IMAGE) ? "&tbm=isch" : "", redirect ? "" : "&nfpr=1");
// fprintf(stdout, "%s\n", txtdoc);
free(us);
if (!txtdoc) {
@@ -178,6 +182,7 @@ enum sc_return sc_query_google (const char * s, /* breaking change: changed retu
rs = SC_CAPTCHA;
goto rc;
}
+ char * resultsforclass = sc_find_class(txtdoc, "{color:#1967d2}");
if (opt & SC_OPT_IMAGE) {
imageclass = sc_find_class(txtdoc, "{font-family:Roboto,Helvetica,Arial,sans-serif}");
if (!imageclass) {
@@ -292,11 +297,42 @@ enum sc_return sc_query_google (const char * s, /* breaking change: changed retu
SC_CUE(c, c->queries_lock);
goto rc;
}
+ q->string = realloc(q->string, sl+1);
+ strcpy(q->string, s);
+ char * xpathsugg = NULL;
+ if (resultsforclass) {
+ xpathsugg = malloc(512+strlen(resultsforclass));
+ sprintf(xpathsugg, "//a[contains(@class, '%s')]", resultsforclass);
+ xmlNodePtr suggnode = nthNodeX(xmldoc, xpathsugg, 0);
+ if (suggnode && xmlHasProp(suggnode, BAD_CAST "href")) {
+ char * href = (char *) xmlGetProp(suggnode, BAD_CAST "href");
+ char * content = (char *) xmlNodeGetContent(suggnode);
+ if (href && strstr(href, "&spell=1&"))
+ strcpy((q->suggested = realloc(q->suggested, strlen(content)+1)), content);
+ xmlFree(href);
+ xmlFree(content);
+ } else {
+ free(q->suggested);
+ q->suggested = NULL;
+ }
+ } else {
+ free(q->suggested);
+ q->suggested = NULL;
+ }
+ xmlNodePtr first = nthNodeX(xmldoc, xpathsugg, 1);
+ if (redirect && xpathsugg && q->suggested && xmlHasProp(first, BAD_CAST "href")) {
+ char * href = (char *) xmlGetProp(first, BAD_CAST "href");
+ if (href && strstr(href, "&nfpr=1&")) {
+ *redirect = q->suggested;
+ q->suggested = NULL;
+ q->string = realloc(q->string, strlen(*redirect)+1);
+ strcpy(q->string, *redirect);
+ }
+ xmlFree(href);
+ }
q->cache = c;
q->lookup_time = time(NULL);
- q->string = realloc(q->string, sl+1);
q->opt |= opt | SC_ENGINE_GOOGLE;
- strcpy(q->string, s);
if (!qwasgiven) {
SC_CWLE(c, c->queries_lock);
#ifdef SC_OLD_STORAGE
@@ -318,6 +354,8 @@ rc:
free(titleclass);
free(descclass);
free(imageclass);
+ free(resultsforclass);
free(xpath);
+ free(xpathsugg);
return rs;
}
diff --git a/src/httpd.c b/src/httpd.c
index 0171a11..c200fc6 100644
--- a/src/httpd.c
+++ b/src/httpd.c
@@ -3,7 +3,7 @@ char * sc_https2http (char * i) {
memmove(i+4, i+5, strlen(i)-3);
return i;
}
-char * sc_queryhtml (const struct sc_query * q, const char * add_form, size_t l) { /* remember to free returned string in the caller */ /* caller takes care of freeing */
+char * sc_queryhtml (const struct sc_query * q, const char * add_form, size_t l, const char * r) { /* remember to free returned string in the caller */ /* caller takes care of freeing */
size_t resultshtml_written = 0;
size_t resultshtml_sizeof = SC_ALLOC_CHUNK;
char * resultshtml = malloc(resultshtml_sizeof);
@@ -39,16 +39,48 @@ char * sc_queryhtml (const struct sc_query * q, const char * add_form, size_t l)
free(safebody);
free(safeurl);
}
-#define SC_HRS SC_I18N_NUMBER_OF_RESULTS ": %zu | " SC_I18N_QUERY_TIME ": %s"
char formatted_time[128];
struct tm tm;
localtime_r(&q->lookup_time, &tm);
strftime(formatted_time, 128, SC_I18N_DATETIME_FORMAT, &tm);
- char queryinfo[256];
- snprintf(queryinfo, 256, SC_HRS, q->results_length, formatted_time);
+ char * safesuggested = NULL;
+ if (q->suggested && strlen(q->suggested) < 4096) {
+ safesuggested = alloca(strlen(q->suggested)*3+256);
+ strcpy(safesuggested, "?q=");
+ urlencode(safesuggested+3, q->suggested);
+ }
+ if (!q->suggested && r && strlen(r) < 4096) {
+ safesuggested = alloca(strlen(r)*3+256);
+ strcpy(safesuggested, "?q=");
+ urlencode(safesuggested+3, r);
+ }
+ char * htmlsuggested = htmlspecialchars(q->suggested);
+ if (!htmlsuggested)
+ htmlsuggested = htmlspecialchars(r);
+ if (safesuggested) {
+ if (strstr(add_form, "name=h"))
+ strcat(safesuggested, "&h=h");
+ if (strstr(add_form, "name=l"))
+ sprintf(safesuggested+strlen(safesuggested), "&l=%d", atoi(strstr(add_form, "name=l")+8));
+ if (strstr(add_form, "name=h"))
+ strcat(safesuggested, "&h=h");
+ if (strstr(add_form, "name=e") || r)
+ strcat(safesuggested, "&e=e");
+ }
+ char * suggested = NULL;
+ if (htmlsuggested && safesuggested)
+ suggested = malloc(1+strlen(SC_I18N_DID_YOU_REALLY_MEAN)+strlen(SC_I18N_DID_YOU_MEAN)+strlen(safesuggested)+strlen(htmlsuggested));
+ if (suggested)
+ sprintf(suggested, "%s <a href='%s'>%s</a>", q->suggested ? SC_I18N_DID_YOU_MEAN : SC_I18N_DID_YOU_REALLY_MEAN, safesuggested, htmlsuggested);
+ char * queryinfo = malloc(256+strlen(suggested ? suggested : ""));
+ snprintf(queryinfo, 256, "%s%s" SC_I18N_NUMBER_OF_RESULTS ": %zu | " SC_I18N_QUERY_TIME ": %s"
+, suggested ? suggested : "", suggested ? " | " : "", q->results_length, formatted_time);
char * safequery = htmlspecialchars(q->string);
char * response = malloc(strlen((char *) sc_hp)+2*strlen(safequery)+strlen(queryinfo)+strlen(resultshtml)+strlen(add_form));
sprintf(response, (char *) sc_hp, safequery, safequery, add_form, queryinfo, resultshtml);
+ free(queryinfo);
+ free(suggested);
+ free(htmlsuggested);
free(safequery);
free(resultshtml);
return response;
@@ -130,6 +162,8 @@ enum MHD_Result sc_httpd (void * cls,
const char * l = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "l");
const char * h = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "h");
const char * f = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "f");
+ const char * e = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "e");
+ const char * r = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "r");
snprintf(add_form, 128, "%s%s%d%s", h ? "<input type=hidden name=h value=h />" : "",
l ? "<input type=hidden name=l value=" : "<!-- Odgovor na dokončno vprašanje o Življenju, Vesolju in sploh Vsem je ",
l ? atoi(l) : 42,
@@ -212,11 +246,29 @@ retry:
sc_https2http(LOCHORSE);
location = out;
} else
- response = sc_queryhtml(q, add_form, atoi(l ? l : "0")); /* MHD_create_response_from_buffer will free response (; */
+ response = sc_queryhtml(q, add_form, atoi(l ? l : "0"), r); /* MHD_create_response_from_buffer will free response (; */
SC_CUE(c, c->queries_lock);
} else {
SC_CUE(c, c->queries_lock);
- enum sc_return r = sc_query_google(query, c, NULL, opt);
+ char * redirect = NULL;
+ enum sc_return r = sc_query_google(query, c, NULL, e ? NULL : &redirect, opt);
+ if (redirect && strlen(query) < 4096 && strlen(redirect) < 4096) {
+ status_code = 307;
+ location = alloca(256+strlen(query)*3+strlen(redirect)*3);
+ sprintf(location, "?l=%d%s%s%s%s&q=", atoi(l ? l : ""), (opt & SC_OPT_IMAGE) ? "&i=i" : "", h ? "&h=h" : "", f ? "&f=f" : "", e ? "&e=e" : "");
+ urlencode(location+strlen(location), redirect);
+ free(redirect);
+ redirect = NULL;
+ strcat(location, "&r=");
+ urlencode(location+strlen(location), query);
+ content_type = "text/plain";
+ char * safeurl = htmlspecialchars(location);
+ free(response);
+ response = malloc(strlen(safeurl)*3+512);
+ sprintf(response, "<meta http-equiv=refresh content='0;URL=%s'><a id=a href='%s'>%s</a><script>a.click();</script>", safeurl, safeurl, safeurl);
+ free(safeurl);
+ goto sendresp;
+ }
if (already_retried++ || r == SC_CAPTCHA) {
status_code = 570+ABS(r);
if (r == SC_CAPTCHA && strlen(query) < 4096) {
@@ -224,15 +276,8 @@ retry:
status_code = 307;
location = alloca(strlen(getenv("SC_FALLBACK"))
+ 256 + strlen(query)*3);
- sprintf(location, "%sl=%d&q=", getenv("SC_FALLBACK"),
- atoi(l ? l : ""));
+ sprintf(location, "%sl=%d%s%s%s%s&q=", getenv("SC_FALLBACK"), atoi(l ? l : ""), (opt & SC_OPT_IMAGE) ? "&i=i" : "", h ? "&h=h" : "", f ? "&f=f" : "", e ? "&e=e" : "");
urlencode(location+strlen(location), query);
- if (opt & SC_OPT_IMAGE)
- strcat(location, "&i=i");
- if (h)
- strcat(location, "&h=h");
- if (f)
- strcat(location, "&f=f");
}
char * safequery = htmlspecialchars(query);
response = malloc(strlen((char*) sc_hp)
@@ -258,6 +303,7 @@ retry:
} else goto retry;
}
}
+ sendresp:
httpd_response = MHD_create_response_from_buffer (response_len ? response_len : strlen(response), (void *) response, mhdrmm);
MHD_add_response_header(httpd_response, "Content-Type", content_type);
if (status_code >= 300 && status_code <= 399)
diff --git a/src/i18n.h b/src/i18n.h
index d06e4f0..86d7b21 100644
--- a/src/i18n.h
+++ b/src/i18n.h
@@ -35,3 +35,5 @@
#define SC_I18N_SEARCH "išči"
#define SC_I18N_HORSESHOE "hitro"
#define SC_I18N_IMAGES "slike"
+#define SC_I18N_DID_YOU_REALLY_MEAN "preusmeril sem vas iz:"
+#define SC_I18N_DID_YOU_MEAN "predlagam iskanje:"
diff --git a/src/lib.c b/src/lib.c
index 2377ec9..62ddf84 100644
--- a/src/lib.c
+++ b/src/lib.c
@@ -46,18 +46,23 @@ void eachNodeX (htmlDocPtr doc, const char * xpath, node_function_t f, void * da
eachNode(nodes, f, data);
xmlXPathFreeObject(nodes);
}
-xmlNodePtr nthNodeXN (xmlNodePtr node, const char * xpath, int n) {
- xmlXPathObjectPtr nodes = findNodesN(node, xpath);
- if (!nodes)
- return NULL;
- xmlNodeSetPtr nodeset = nodes->nodesetval;
- int size = nodeset->nodeNr;
- if (size <= n)
- return NULL;
- xmlNodePtr toreturn = (xmlNodePtr) nodeset->nodeTab[n];
- xmlXPathFreeObject(nodes);
- return toreturn;
+#define nthNodeFunctionGenerator(type, x) \
+xmlNodePtr nthNodeX##x (type node, const char * xpath, int n) { \
+ xmlXPathObjectPtr nodes = findNodes##x(node, xpath); \
+ if (!nodes) \
+ return NULL; \
+ xmlNodeSetPtr nodeset = nodes->nodesetval; \
+ int size = nodeset->nodeNr; \
+ if (size <= n) { \
+ xmlXPathFreeObject(nodes); \
+ return NULL; \
+ } \
+ xmlNodePtr toreturn = (xmlNodePtr) nodeset->nodeTab[n]; \
+ xmlXPathFreeObject(nodes); \
+ return toreturn; \
}
+nthNodeFunctionGenerator(htmlDocPtr,) // this one gets doc
+nthNodeFunctionGenerator(xmlNodePtr, N)
#define EACHNODE(node, nodes) /* you can instead use eachNodeX with anonymous function - no need to free and findnodes separatl */ \
for (int EACHNODE_i = 0; \
nodes ? nodes->nodesetval ? \
@@ -110,6 +115,9 @@ char * htmlspecialchars (const char * i) { /* remember to free the output */
case '"':
w += sprintf(o+w, "&quot;");
break;
+ case '\'':
+ w += sprintf(o+w, "&apos;");
+ break;
default:
o[w++] = *i;
break;
diff --git a/src/main.c b/src/main.c
index fdab429..a9857b6 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,4 +1,5 @@
#define _GNU_SOURCE
+#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
diff --git a/src/structs.c b/src/structs.c
index 6deea3e..f23e52f 100644
--- a/src/structs.c
+++ b/src/structs.c
@@ -84,6 +84,8 @@ struct sc_query {
SC_IN_STRUCT_ARRAY(struct sc_result, results); /* yesfree */
char * string; /* yesfree - query string, stripped of any excess characters that should be excluded from indexing */
time_t lookup_time; /* time of last lookup */
+ char * suggested; /* yesfree - suggested search query (did you mean) */
+ bool redirect; /* true if client is encouraged to be redirected to suggested (showing results for) */
SC_OPT_TYPE opt; /* some options including engines */
};
struct sc_query * sc_query_init () {
@@ -95,6 +97,7 @@ struct sc_query * sc_query_init () {
q->results[i]->query = q;
}
q->string = NULL;
+ q->suggested = NULL;
q->opt = SC_OPT_INIT;
return q;
}
@@ -122,8 +125,9 @@ sc_query_free (
#endif
;
if (q->cache)
- SC_LOG(SC_LOG_DEBUG, q->cache, "sc_query_free: %s", q->string ? q->string : "NULL");
+ SC_LOG(SC_LOG_DEBUG, q->cache, "sc_query_free: %s (sugg: %s)", q->string ? q->string : "NULL", q->suggested ? q->suggested : "NULL");
free(q->string); /* if they were not alloced, they are NULL, if they were free'd somewhere else, they are also set to NULL */
+ free(q->suggested);
for (size_t i = 0; i < q->results_sizeof; i++)
sc_result_free(q->results[i]);
free(q->results);