#include #include #include #include #include #include #include int main (int argc, char ** argv) { xmlInitParser(); htmlDocPtr xmldoc; char * txtdoc; struct stat s; int fd = open(argv[1], O_RDONLY); htmlParserCtxtPtr c; stat(argv[1], &s); txtdoc = mmap(NULL, s.st_size, PROT_READ, MAP_PRIVATE, fd, 0); xmlInitParser(); c = htmlNewParserCtxt(); xmldoc = htmlCtxtReadMemory(c, txtdoc, strlen(txtdoc), "", NULL, HTML_PARSE_RECOVER); /* by the way: why/how/when does libxml2 use networking when HTML_PARSE_NOT is not specified? */ htmlFreeParserCtxt(c); xmlFreeDoc(xmldoc); close(fd); munmap(txtdoc, s.st_size); xmlCleanupParser(); return 0; }