summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsijanec <anton@sijanec.eu>2021-04-04 22:52:39 +0200
committersijanec <anton@sijanec.eu>2021-04-04 22:52:39 +0200
commit4916ba77785633cd0d74a05802d0fc08764f40dc (patch)
tree747e53b5e64dbaaba71dbef5033424f2c5166067
parentfixed dependency on stdin to pause (diff)
downloadsear.c-4916ba77785633cd0d74a05802d0fc08764f40dc.tar
sear.c-4916ba77785633cd0d74a05802d0fc08764f40dc.tar.gz
sear.c-4916ba77785633cd0d74a05802d0fc08764f40dc.tar.bz2
sear.c-4916ba77785633cd0d74a05802d0fc08764f40dc.tar.lz
sear.c-4916ba77785633cd0d74a05802d0fc08764f40dc.tar.xz
sear.c-4916ba77785633cd0d74a05802d0fc08764f40dc.tar.zst
sear.c-4916ba77785633cd0d74a05802d0fc08764f40dc.zip
-rw-r--r--src/api.c40
-rw-r--r--src/hp.html15
-rw-r--r--src/httpd.c10
-rw-r--r--src/structs.c7
4 files changed, 60 insertions, 12 deletions
diff --git a/src/api.c b/src/api.c
index 6ad996e..76431cc 100644
--- a/src/api.c
+++ b/src/api.c
@@ -107,6 +107,22 @@ char * sc_find_class (char * haystack, const char * definition) { /* you must fr
toreturn[endofclass-class] = '\0';
return toreturn;
}
+int sc_fix_url (char ** h) { /* fixes a (result) URL in-place (removes tracking nonsense, so resulting URL is shorter or equl) */
+ if (!h || !*h) /* stage 0: prevent accidental death */
+ return -1;
+ if (!strncmp(*h, "/url?q=", strlen("/url?q="))) { /* stage 1: url may be tracking url by google results */
+ *h = *h+strlen("/url?q=");
+ *strchrnul(*h, '&') = '\0';
+ urldecode(*h, *h);
+ }
+ char * c = NULL;
+ if ((c = strstr(*h, "googleweblight.com/fp?u="))) { /* stage 2: url may be "light web" tracking url by google results */
+ *h = c+strlen("googleweblight.com/fp?u="); /* we could disable this with a cookie but meh, this is easier and _stateless_ */
+ *strchrnul(*h, '&') = '\0';
+ urldecode(*h, *h);
+ } /* TODO: be pedantic and remove utm_source and other tracking bullshit */
+ return 1;
+}
struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct sc_query * q) { /* check for cached queries first! */
/* query is in most cases NULL. then it will be allocated and put into sc_cache. otherwise response will be put into passed q. */
/* if query is not NULL, it MUST be initialized */
@@ -146,7 +162,7 @@ struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct s
char * xpath = NULL;
char * descclass = NULL;
char * titleclass = NULL;
- char * txtdoc = SC_CAPI(c, NULL, NULL, "http://wap.google.com/search?q=%s", us);
+ char * txtdoc = SC_CAPI(c, NULL, NULL, "http://wap.google.com/search?q=%s&num=100", us);
// fprintf(stdout, "%s\n", txtdoc);
free(us);
if (!txtdoc) {
@@ -162,7 +178,8 @@ struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct s
goto rc;
}
#define SC_GTXF "/html/body//a[contains(@class, '%s')]" /* result a */
-#define SC_GTXD "../..//table//span[@class='%s']"
+#define SC_GTXD /* description */ "../..//table//span[@class='%s']"
+#define SC_GTXB /* breadcrumbs */ ".//span[@class='%s']"
#define SC_GTR q->results[q->results_length-1]
xpath = malloc(strlen(titleclass)+strlen(SC_GTXF));
sprintf(xpath, SC_GTXF, titleclass); /* whenever starts with titleclas */
@@ -177,16 +194,18 @@ struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct s
if (node->type == XML_ELEMENT_NODE) {
xmlAttrPtr href = xmlHasProp(node, BAD_CAST "href");
if (href) {
- char * hreflink = (char *) xmlGetProp(node, BAD_CAST "href");
- if (!strncmp(hreflink, "/url?q=", strlen("/url?q="))) {
- hreflink = hreflink+strlen("/url?q=");
- *strchrnul(hreflink, '&') = '\0';
- urldecode(hreflink, hreflink);
- }
+ char * hreflink = (char *) xmlGetProp(node, BAD_CAST "href"); /* fuck rules, I will rewrite it anyways <= hi future me */
+ sc_fix_url(&hreflink);
char * x = malloc(strlen(descclass)+strlen(SC_GTXD));
+ char * xbread = malloc(strlen(descclass)+strlen(SC_GTXB));
sprintf(x, SC_GTXD, descclass /* remember, kids, GNU C is fucking legendary */);
+ sprintf(xbread, SC_GTXB, descclass /* remember, kids, GNU C is fucking legendary */);
xmlNodePtr descnode = nthNodeXN(node, x, 0);
+ if (!descnode) /* description may be above, see https://support.google.com/websearch?p=featured_snippets */
+ descnode = nthNodeXN(node, "../../div/div", 0);
+ xmlNodePtr breadnode = nthNodeXN(node, xbread, 0);
free(x);
+ free(xbread);
if (q->results_sizeof <= q->results_length)
SC_BIGGER_ARRAY(q->results, sc_result);
q->results_length++;
@@ -206,6 +225,11 @@ struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct s
SC_GTR->desc = malloc(strlen(SC_I18N_NO_DESCRIPTION)+1);
strcpy(SC_GTR->desc, SC_I18N_NO_DESCRIPTION);
}
+ SC_GTR->breadcrumbs = (char *) xmlNodeGetContent(breadnode);
+ if (!SC_GTR->breadcrumbs) {
+ SC_GTR->breadcrumbs = malloc(strlen(SC_GTR->url)+1);
+ strcpy(SC_GTR->breadcrumbs, SC_GTR->url);
+ }
}
}
}
diff --git a/src/hp.html b/src/hp.html
index d2bc82f..47aff62 100644
--- a/src/hp.html
+++ b/src/hp.html
@@ -14,7 +14,7 @@
<style>
input[type=password], input[type=text], input[type=submit], input[type=button] {
height: 1cm;
- font-size: 18px;
+ font-size: large;
}
.result:hover {
background: var(--bgc2);
@@ -44,6 +44,19 @@
.SC_LOG_DEBUG {
color: magenta;
}
+ .breadcrumb {
+ color: var(--fgc2);
+ font-size: small; /* small is relative to parent (h4) size */
+ }
+ .result h4 {
+ white-space: nowrap;
+ overflow: hidden;
+ text-overflow: ellipsis;
+ margin-bottom: 0.314159265358em;
+ }
+ .result p {
+ margin-top: 0.314159265358em;
+ }
</style>
</head>
<body>
diff --git a/src/httpd.c b/src/httpd.c
index bf5c3d1..656ad92 100644
--- a/src/httpd.c
+++ b/src/httpd.c
@@ -9,13 +9,17 @@ char * sc_queryhtml (struct sc_query * q) { /* remember to free returned string
string##_sizeof = (string##_written+wanted+1)*SC_REALLOC_K; \
string = realloc(string, string##_sizeof); \
}
-#define SC_HRF "<div class=result><h4><a href=\"%s\">%s</a></h4><p>%s</p></div>"
+#define SC_HRF "<div class=result id=result%lu><h4><a href=\"%s\" accesskey=%lu>%s</a> " \
+ "<span class=breadcrumb>%s</span></h4><p>%s</p></div>"
+#define SC_HRA i, safeurl, i, safetitle, safebreadcrumbs, safebody
char * safetitle = htmlspecialchars(q->results[i]->title);
char * safebody = htmlspecialchars(q->results[i]->desc);
char * safeurl = htmlspecialchars(q->results[i]->url);
- size_t ws = snprintf(NULL, 0, SC_HRF, safeurl, safetitle, safebody);
+ char * safebreadcrumbs = htmlspecialchars(q->results[i]->breadcrumbs);
+ size_t ws = snprintf(NULL, 0, SC_HRF, SC_HRA);
SC_HRC(resultshtml, ws);
- resultshtml_written += sprintf(resultshtml+resultshtml_written, SC_HRF, safeurl, safetitle, safebody);
+ resultshtml_written += sprintf(resultshtml+resultshtml_written, SC_HRF, SC_HRA);
+ free(safebreadcrumbs);
free(safetitle);
free(safebody);
free(safeurl);
diff --git a/src/structs.c b/src/structs.c
index b99f1eb..2d83f74 100644
--- a/src/structs.c
+++ b/src/structs.c
@@ -36,9 +36,14 @@ struct sc_result {
time_t date; /* some search engines like to extract a date from a website, store that here - not implemented */
unsigned short int rating; /* some search engines like to extract a rating from a website, store that here */ /* not implementd */
unsigned short int rating_max; /* max rating when above is used /\ */ /* not implemented yet */
+ char * breadcrumbs; /* yesfree - google has nice breadcrumbs, when hovering over the URL requires too much time (: */
};
struct sc_result * sc_result_init () {
struct sc_result * r = calloc(1, sizeof(struct sc_result));
+ r->url = NULL;
+ r->desc = NULL;
+ r->title = NULL;
+ r->breadcrumbs = NULL;
return r;
}
int sc_result_free (struct sc_result * r) {
@@ -47,6 +52,7 @@ int sc_result_free (struct sc_result * r) {
free(r->url);
free(r->desc);
free(r->title);
+ free(r->breadcrumbs);
free(r);
return 1;
}
@@ -65,6 +71,7 @@ struct sc_query * sc_query_init () {
q->results[i] = sc_result_init();
q->results[i]->query = q;
}
+ q->string = NULL;
return q;
}
int sc_query_free (struct sc_query * q) {