summaryrefslogtreecommitdiffstats
path: root/src/lib.c
blob: 62ddf846a31ce67d4dc75338878a7b194d5d352f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
htmlDocPtr parseHtmlDocument (const char * d, const char * b /* base url */) {
	if (!b)
		b = "";
	htmlParserCtxtPtr parser_context = htmlNewParserCtxt();
	htmlDocPtr document = htmlCtxtReadMemory(parser_context, d, strlen(d), b, NULL /* encoding */, HTML_PARSE_NOWARNING | HTML_PARSE_NOERROR  | HTML_PARSE_RECOVER);
	htmlFreeParserCtxt(parser_context);
	return document;
}
xmlXPathObjectPtr findNodes (htmlDocPtr document, const char * xpath_query) {
	xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(document);
	xmlXPathObjectPtr nodes = xmlXPathEvalExpression(BAD_CAST xpath_query, xpath_ctx);
	if (xmlXPathNodeSetIsEmpty(nodes->nodesetval)) {
		xmlXPathFreeContext(xpath_ctx);
		xmlXPathFreeObject(nodes);
		return NULL;
	}
	xmlXPathFreeContext(xpath_ctx);
	return nodes;
}
xmlXPathObjectPtr findNodesN (xmlNodePtr node, const char * xpath_query) {
	xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(node->doc);
	xmlXPathSetContextNode(node, xpath_ctx);
	xmlXPathObjectPtr nodes = xmlXPathNodeEval(node, BAD_CAST xpath_query, xpath_ctx);
	if (xmlXPathNodeSetIsEmpty(nodes->nodesetval)) {
		xmlXPathFreeContext(xpath_ctx);
		xmlXPathFreeObject(nodes);
		return NULL;
	}
	xmlXPathFreeContext(xpath_ctx);
	return nodes;
}
typedef void (*node_function_t) (xmlNodePtr node, void * data);
void eachNode (xmlXPathObjectPtr nodes, node_function_t f, void * data) { /* you can instead use EACHNODE macro */
	xmlNodeSetPtr nodeset = nodes->nodesetval;
	int i, size = nodeset->nodeNr;
	for (i = 0; i < size; i++) {
		xmlNodePtr cur;
		cur = (xmlNodePtr) nodeset->nodeTab[i];
		f(cur, data);
	}
}
void eachNodeX (htmlDocPtr doc, const char * xpath, node_function_t f, void * data) {
	xmlXPathObjectPtr nodes = findNodes(doc, xpath);
	if (!nodes)
		return;
	eachNode(nodes, f, data);
	xmlXPathFreeObject(nodes);
}
#define nthNodeFunctionGenerator(type, x) \
xmlNodePtr nthNodeX##x (type node, const char * xpath, int n) { \
	xmlXPathObjectPtr nodes = findNodes##x(node, xpath); \
	if (!nodes) \
		return NULL; \
	xmlNodeSetPtr nodeset = nodes->nodesetval; \
	int size = nodeset->nodeNr; \
	if (size <= n) { \
		xmlXPathFreeObject(nodes); \
		return NULL; \
	} \
	xmlNodePtr toreturn = (xmlNodePtr) nodeset->nodeTab[n]; \
	xmlXPathFreeObject(nodes); \
	return toreturn; \
}
nthNodeFunctionGenerator(htmlDocPtr,) // this one gets doc
nthNodeFunctionGenerator(xmlNodePtr, N)
#define EACHNODE(node, nodes) /* you can instead use eachNodeX with anonymous function - no need to free and findnodes separatl */ \
	for (int EACHNODE_i = 0; \
			nodes ? nodes->nodesetval ? \
				((EACHNODE_i < nodes->nodesetval->nodeNr) && (node = (xmlNodePtr)nodes->nodesetval->nodeTab[EACHNODE_i])) \
			: 0 : 0; \
			EACHNODE_i++)
/* // to ne dela
#define EACHNODEX(node, target, xpath) \
	xmlXPathObjectPtr EACHNODEX_nodes##__LINE__ = findNodes(target, xpath); \
		for (size_t EACHNODEX_i = 0; \
				EACHNODEX_nodes##__LINE__ ? EACHNODEX_nodes##__LINE__->nodesetval \
					? ((EACHNODEX_i < EACHNODEX_nodes##__LINE__->nodesetval->nodeNr) \
						&& (node = (xmlNodePtr) EACHNODEX_nodes##__LINE__->nodesetval->nodeTab[EACHNODEX_i])) \
					: xmlXPathFreeObject(EACHNODEX_nodes##__LINE__) \
				: 0 : 0; \
				EACHNODEX_i++)
*/
void printNode (xmlNodePtr node, void * data) {
	if (data){}
	if (node->type == XML_ELEMENT_NODE) {
		printf("-> content: '%s'\n", (char *) xmlNodeGetContent(node));
	}
}
#define gnu_code_start \
	_Pragma ("GCC diagnostic push") \
	_Pragma ("GCC diagnostic ignored \"-Wpedantic\"") \
	_Pragma ("GCC diagnostic ignored \"-Wformat=\"")
#define gnu_code_end \
	_Pragma ("GCC diagnostic pop") 
/* this is the definition of the anonymous function - source: https://en.wikipedia.org/wiki/Anonymous_function#GCC */
#define lambda(l_ret_type, l_arguments, l_body)        \
	({                                                   \
	 l_ret_type l_anonymous_functions_name l_arguments   \
	 l_body                                              \
	 &l_anonymous_functions_name;                        \
	 })
char * htmlspecialchars (const char * i) { /* remember to free the output */
	if (!i)
		return NULL;
	size_t s = 128;
	char * o = malloc(s);
	size_t w = 0;
	for (; *i; i++) {
		if (s - w <= 10)
			o = realloc(o, (s *= 1.5));
		switch (*i) {
			case '<':
				w += sprintf(o+w, "&lt;");
				break;
			case '"':
				w += sprintf(o+w, "&quot;");
				break;
			case '\'':
				w += sprintf(o+w, "&apos;");
				break;
			default:
				o[w++] = *i;
				break;
		}
	}
	o[w++] = '\0';
	return o;
}