summaryrefslogtreecommitdiffstats
path: root/src/lib.c
diff options
context:
space:
mode:
authorsijanec <anton@sijanec.eu>2021-04-03 23:15:48 +0200
committersijanec <anton@sijanec.eu>2021-04-03 23:15:48 +0200
commitbbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4 (patch)
treef9960c7a43f7c0e1da6cb8e8656fcbda2129677a /src/lib.c
parentinitial commit (diff)
downloadsear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar
sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.gz
sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.bz2
sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.lz
sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.xz
sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.tar.zst
sear.c-bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4.zip
Diffstat (limited to '')
-rw-r--r--src/lib.c104
1 files changed, 94 insertions, 10 deletions
diff --git a/src/lib.c b/src/lib.c
index 2c3e34a..5c0576e 100644
--- a/src/lib.c
+++ b/src/lib.c
@@ -1,4 +1,4 @@
-static htmlDocPtr parseHtmlDocument(const char * d, const char * b /* base url */) {
+htmlDocPtr parseHtmlDocument (const char * d, const char * b /* base url */) {
if (!b)
b = "";
htmlParserCtxtPtr parser_context = htmlNewParserCtxt();
@@ -6,28 +6,112 @@ static htmlDocPtr parseHtmlDocument(const char * d, const char * b /* base url *
htmlFreeParserCtxt(parser_context);
return document;
}
-static xmlXPathObjectPtr findNodes(htmlDocPtr document, const char * xpath_query) {
+xmlXPathObjectPtr findNodes (htmlDocPtr document, const char * xpath_query) {
xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(document);
xmlXPathObjectPtr nodes = xmlXPathEvalExpression(BAD_CAST xpath_query, xpath_ctx);
+ if (xmlXPathNodeSetIsEmpty(nodes->nodesetval)) {
+ xmlXPathFreeContext(xpath_ctx);
+ xmlXPathFreeObject(nodes);
+ return NULL;
+ }
+ xmlXPathFreeContext(xpath_ctx);
+ return nodes;
+}
+xmlXPathObjectPtr findNodesN (xmlNodePtr node, const char * xpath_query) {
+ xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(node->doc);
+ xmlXPathSetContextNode(node, xpath_ctx);
+ xmlXPathObjectPtr nodes = xmlXPathNodeEval(node, BAD_CAST xpath_query, xpath_ctx);
+ if (xmlXPathNodeSetIsEmpty(nodes->nodesetval)) {
+ xmlXPathFreeContext(xpath_ctx);
+ xmlXPathFreeObject(nodes);
+ return NULL;
+ }
xmlXPathFreeContext(xpath_ctx);
return nodes;
}
-typedef void (*node_function_t)(xmlNodePtr node, void * data);
-static void eachNode(xmlXPathObjectPtr nodes, node_function_t f, void * data) {
+typedef void (*node_function_t) (xmlNodePtr node, void * data);
+void eachNode (xmlXPathObjectPtr nodes, node_function_t f, void * data) { /* you can instead use EACHNODE macro */
xmlNodeSetPtr nodeset = nodes->nodesetval;
int i, size = nodeset->nodeNr;
for (i = 0; i < size; i++) {
xmlNodePtr cur;
- cur = (xmlNodePtr)nodeset->nodeTab[i];
+ cur = (xmlNodePtr) nodeset->nodeTab[i];
f(cur, data);
}
}
-void printLinkNode(xmlNodePtr node, void * data) {
+void eachNodeX (htmlDocPtr doc, const char * xpath, node_function_t f, void * data) {
+ xmlXPathObjectPtr nodes = findNodes(doc, xpath);
+ if (!nodes)
+ return;
+ eachNode(nodes, f, data);
+ xmlXPathFreeObject(nodes);
+}
+xmlNodePtr nthNodeXN (xmlNodePtr node, const char * xpath, int n) {
+ xmlXPathObjectPtr nodes = findNodesN(node, xpath);
+ if (!nodes)
+ return NULL;
+ xmlNodeSetPtr nodeset = nodes->nodesetval;
+ int size = nodeset->nodeNr;
+ if (size <= n)
+ return NULL;
+ xmlNodePtr toreturn = (xmlNodePtr) nodeset->nodeTab[n];
+ xmlXPathFreeObject(nodes);
+ return toreturn;
+}
+#define EACHNODE(node, nodes) /* you can instead use eachNodeX with anonymous function - no need to free and findnodes separatl */ \
+ for (int EACHNODE_i = 0; \
+ nodes ? nodes->nodesetval ? \
+ ((EACHNODE_i < nodes->nodesetval->nodeNr) && (node = (xmlNodePtr)nodes->nodesetval->nodeTab[EACHNODE_i])) \
+ : 0 : 0; \
+ EACHNODE_i++)
+/* // to ne dela
+#define EACHNODEX(node, target, xpath) \
+ xmlXPathObjectPtr EACHNODEX_nodes##__LINE__ = findNodes(target, xpath); \
+ for (size_t EACHNODEX_i = 0; \
+ EACHNODEX_nodes##__LINE__ ? EACHNODEX_nodes##__LINE__->nodesetval \
+ ? ((EACHNODEX_i < EACHNODEX_nodes##__LINE__->nodesetval->nodeNr) \
+ && (node = (xmlNodePtr) EACHNODEX_nodes##__LINE__->nodesetval->nodeTab[EACHNODEX_i])) \
+ : xmlXPathFreeObject(EACHNODEX_nodes##__LINE__) \
+ : 0 : 0; \
+ EACHNODEX_i++)
+*/
+void printNode (xmlNodePtr node, void * data) {
+ if (data){}
if (node->type == XML_ELEMENT_NODE) {
- xmlAttrPtr href = xmlHasProp(node, BAD_CAST "href");
- if (href) {
- printf("-> Link to '%s'\n", xmlGetProp(node, BAD_CAST "href"));
+ printf("-> content: '%s'\n", (char *) xmlNodeGetContent(node));
+ }
+}
+#define gnu_code_start \
+ _Pragma ("GCC diagnostic push") \
+ _Pragma ("GCC diagnostic ignored \"-Wpedantic\"")
+#define gnu_code_end \
+ _Pragma ("GCC diagnostic pop")
+/* this is the definition of the anonymous function - source: https://en.wikipedia.org/wiki/Anonymous_function#GCC */
+#define lambda(l_ret_type, l_arguments, l_body) \
+ ({ \
+ l_ret_type l_anonymous_functions_name l_arguments \
+ l_body \
+ &l_anonymous_functions_name; \
+ })
+char * htmlspecialchars (const char * i) { /* remember to free the output */
+ size_t s = 128;
+ char * o = malloc(s);
+ size_t w = 0;
+ for (; *i; i++) {
+ if (s - w <= 10)
+ o = realloc(o, (s *= 1.5));
+ switch (*i) {
+ case '<':
+ w += sprintf(o+w, "&lt;");
+ break;
+ case '"':
+ w += sprintf(o+w, "&quot;");
+ break;
+ default:
+ o[w++] = *i;
+ break;
}
}
+ o[w++] = '\0';
+ return o;
}
-