htmlDocPtr parseHtmlDocument (const char * d, const char * b /* base url */) { if (!b) b = ""; htmlParserCtxtPtr parser_context = htmlNewParserCtxt(); htmlDocPtr document = htmlCtxtReadMemory(parser_context, d, strlen(d), b, NULL /* encoding */, HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR | HTML_PARSE_RECOVER); htmlFreeParserCtxt(parser_context); return document; } xmlXPathObjectPtr findNodes (htmlDocPtr document, const char * xpath_query) { xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(document); xmlXPathObjectPtr nodes = xmlXPathEvalExpression(BAD_CAST xpath_query, xpath_ctx); if (xmlXPathNodeSetIsEmpty(nodes->nodesetval)) { xmlXPathFreeContext(xpath_ctx); xmlXPathFreeObject(nodes); return NULL; } xmlXPathFreeContext(xpath_ctx); return nodes; } xmlXPathObjectPtr findNodesN (xmlNodePtr node, const char * xpath_query) { xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(node->doc); xmlXPathSetContextNode(node, xpath_ctx); xmlXPathObjectPtr nodes = xmlXPathNodeEval(node, BAD_CAST xpath_query, xpath_ctx); if (xmlXPathNodeSetIsEmpty(nodes->nodesetval)) { xmlXPathFreeContext(xpath_ctx); xmlXPathFreeObject(nodes); return NULL; } xmlXPathFreeContext(xpath_ctx); return nodes; } typedef void (*node_function_t) (xmlNodePtr node, void * data); void eachNode (xmlXPathObjectPtr nodes, node_function_t f, void * data) { /* you can instead use EACHNODE macro */ xmlNodeSetPtr nodeset = nodes->nodesetval; int i, size = nodeset->nodeNr; for (i = 0; i < size; i++) { xmlNodePtr cur; cur = (xmlNodePtr) nodeset->nodeTab[i]; f(cur, data); } } void eachNodeX (htmlDocPtr doc, const char * xpath, node_function_t f, void * data) { xmlXPathObjectPtr nodes = findNodes(doc, xpath); if (!nodes) return; eachNode(nodes, f, data); xmlXPathFreeObject(nodes); } #define nthNodeFunctionGenerator(type, x) \ xmlNodePtr nthNodeX##x (type node, const char * xpath, int n) { \ xmlXPathObjectPtr nodes = findNodes##x(node, xpath); \ if (!nodes) \ return NULL; \ xmlNodeSetPtr nodeset = nodes->nodesetval; \ int size = nodeset->nodeNr; \ if (size <= n) { \ xmlXPathFreeObject(nodes); \ return NULL; \ } \ xmlNodePtr toreturn = (xmlNodePtr) nodeset->nodeTab[n]; \ xmlXPathFreeObject(nodes); \ return toreturn; \ } nthNodeFunctionGenerator(htmlDocPtr,) // this one gets doc nthNodeFunctionGenerator(xmlNodePtr, N) #define EACHNODE(node, nodes) /* you can instead use eachNodeX with anonymous function - no need to free and findnodes separatl */ \ for (int EACHNODE_i = 0; \ nodes ? nodes->nodesetval ? \ ((EACHNODE_i < nodes->nodesetval->nodeNr) && (node = (xmlNodePtr)nodes->nodesetval->nodeTab[EACHNODE_i])) \ : 0 : 0; \ EACHNODE_i++) /* // to ne dela #define EACHNODEX(node, target, xpath) \ xmlXPathObjectPtr EACHNODEX_nodes##__LINE__ = findNodes(target, xpath); \ for (size_t EACHNODEX_i = 0; \ EACHNODEX_nodes##__LINE__ ? EACHNODEX_nodes##__LINE__->nodesetval \ ? ((EACHNODEX_i < EACHNODEX_nodes##__LINE__->nodesetval->nodeNr) \ && (node = (xmlNodePtr) EACHNODEX_nodes##__LINE__->nodesetval->nodeTab[EACHNODEX_i])) \ : xmlXPathFreeObject(EACHNODEX_nodes##__LINE__) \ : 0 : 0; \ EACHNODEX_i++) */ void printNode (xmlNodePtr node, void * data) { if (data){} if (node->type == XML_ELEMENT_NODE) { printf("-> content: '%s'\n", (char *) xmlNodeGetContent(node)); } } #define gnu_code_start \ _Pragma ("GCC diagnostic push") \ _Pragma ("GCC diagnostic ignored \"-Wpedantic\"") \ _Pragma ("GCC diagnostic ignored \"-Wformat=\"") #define gnu_code_end \ _Pragma ("GCC diagnostic pop") /* this is the definition of the anonymous function - source: https://en.wikipedia.org/wiki/Anonymous_function#GCC */ #define lambda(l_ret_type, l_arguments, l_body) \ ({ \ l_ret_type l_anonymous_functions_name l_arguments \ l_body \ &l_anonymous_functions_name; \ }) char * htmlspecialchars (const char * i) { /* remember to free the output */ if (!i) return NULL; size_t s = 128; char * o = malloc(s); size_t w = 0; for (; *i; i++) { if (s - w <= 10) o = realloc(o, (s *= 1.5)); switch (*i) { case '<': w += sprintf(o+w, "<"); break; case '"': w += sprintf(o+w, """); break; case '\'': w += sprintf(o+w, "'"); break; default: o[w++] = *i; break; } } o[w++] = '\0'; return o; }