summaryrefslogtreecommitdiffstats
path: root/src/lib.c
blob: 5c0576ec3928abc0fb53005d0287bcce616c89f0 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
htmlDocPtr parseHtmlDocument (const char * d, const char * b /* base url */) {
	if (!b)
		b = "";
	htmlParserCtxtPtr parser_context = htmlNewParserCtxt();
	htmlDocPtr document = htmlCtxtReadMemory(parser_context, d, strlen(d), b, NULL /* encoding */, HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR  | HTML_PARSE_RECOVER);
	htmlFreeParserCtxt(parser_context);
	return document;
}
xmlXPathObjectPtr findNodes (htmlDocPtr document, const char * xpath_query) {
	xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(document);
	xmlXPathObjectPtr nodes = xmlXPathEvalExpression(BAD_CAST xpath_query, xpath_ctx);
	if (xmlXPathNodeSetIsEmpty(nodes->nodesetval)) {
		xmlXPathFreeContext(xpath_ctx);
		xmlXPathFreeObject(nodes);
		return NULL;
	}
	xmlXPathFreeContext(xpath_ctx);
	return nodes;
}
xmlXPathObjectPtr findNodesN (xmlNodePtr node, const char * xpath_query) {
	xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(node->doc);
	xmlXPathSetContextNode(node, xpath_ctx);
	xmlXPathObjectPtr nodes = xmlXPathNodeEval(node, BAD_CAST xpath_query, xpath_ctx);
	if (xmlXPathNodeSetIsEmpty(nodes->nodesetval)) {
		xmlXPathFreeContext(xpath_ctx);
		xmlXPathFreeObject(nodes);
		return NULL;
	}
	xmlXPathFreeContext(xpath_ctx);
	return nodes;
}
typedef void (*node_function_t) (xmlNodePtr node, void * data);
void eachNode (xmlXPathObjectPtr nodes, node_function_t f, void * data) { /* you can instead use EACHNODE macro */
	xmlNodeSetPtr nodeset = nodes->nodesetval;
	int i, size = nodeset->nodeNr;
	for (i = 0; i < size; i++) {
		xmlNodePtr cur;
		cur = (xmlNodePtr) nodeset->nodeTab[i];
		f(cur, data);
	}
}
void eachNodeX (htmlDocPtr doc, const char * xpath, node_function_t f, void * data) {
	xmlXPathObjectPtr nodes = findNodes(doc, xpath);
	if (!nodes)
		return;
	eachNode(nodes, f, data);
	xmlXPathFreeObject(nodes);
}
xmlNodePtr nthNodeXN (xmlNodePtr node, const char * xpath, int n) {
	xmlXPathObjectPtr nodes = findNodesN(node, xpath);
	if (!nodes)
		return NULL;
	xmlNodeSetPtr nodeset = nodes->nodesetval;
	int size = nodeset->nodeNr;
	if (size <= n)
		return NULL;
	xmlNodePtr toreturn = (xmlNodePtr) nodeset->nodeTab[n];
	xmlXPathFreeObject(nodes);
	return toreturn;
}
#define EACHNODE(node, nodes) /* you can instead use eachNodeX with anonymous function - no need to free and findnodes separatl */ \
	for (int EACHNODE_i = 0; \
			nodes ? nodes->nodesetval ? \
				((EACHNODE_i < nodes->nodesetval->nodeNr) && (node = (xmlNodePtr)nodes->nodesetval->nodeTab[EACHNODE_i])) \
			: 0 : 0; \
			EACHNODE_i++)
/* // to ne dela
#define EACHNODEX(node, target, xpath) \
	xmlXPathObjectPtr EACHNODEX_nodes##__LINE__ = findNodes(target, xpath); \
		for (size_t EACHNODEX_i = 0; \
				EACHNODEX_nodes##__LINE__ ? EACHNODEX_nodes##__LINE__->nodesetval \
					? ((EACHNODEX_i < EACHNODEX_nodes##__LINE__->nodesetval->nodeNr) \
						&& (node = (xmlNodePtr) EACHNODEX_nodes##__LINE__->nodesetval->nodeTab[EACHNODEX_i])) \
					: xmlXPathFreeObject(EACHNODEX_nodes##__LINE__) \
				: 0 : 0; \
				EACHNODEX_i++)
*/
void printNode (xmlNodePtr node, void * data) {
	if (data){}
	if (node->type == XML_ELEMENT_NODE) {
		printf("-> content: '%s'\n", (char *) xmlNodeGetContent(node));
	}
}
#define gnu_code_start \
	_Pragma ("GCC diagnostic push") \
	_Pragma ("GCC diagnostic ignored \"-Wpedantic\"")
#define gnu_code_end \
	_Pragma ("GCC diagnostic pop") 
/* this is the definition of the anonymous function - source: https://en.wikipedia.org/wiki/Anonymous_function#GCC */
#define lambda(l_ret_type, l_arguments, l_body)        \
	({                                                   \
	 l_ret_type l_anonymous_functions_name l_arguments   \
	 l_body                                              \
	 &l_anonymous_functions_name;                        \
	 })
char * htmlspecialchars (const char * i) { /* remember to free the output */
	size_t s = 128;
	char * o = malloc(s);
	size_t w = 0;
	for (; *i; i++) {
		if (s - w <= 10)
			o = realloc(o, (s *= 1.5));
		switch (*i) {
			case '<':
				w += sprintf(o+w, "&lt;");
				break;
			case '"':
				w += sprintf(o+w, "&quot;");
				break;
			default:
				o[w++] = *i;
				break;
		}
	}
	o[w++] = '\0';
	return o;
}