diff options
Diffstat (limited to '')
-rw-r--r-- | MemDumpAnalysis/expat/xmltok_impl.c | 1800 |
1 files changed, 0 insertions, 1800 deletions
diff --git a/MemDumpAnalysis/expat/xmltok_impl.c b/MemDumpAnalysis/expat/xmltok_impl.c deleted file mode 100644 index 5fa578a43..000000000 --- a/MemDumpAnalysis/expat/xmltok_impl.c +++ /dev/null @@ -1,1800 +0,0 @@ -/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd - See the file COPYING for copying permission. -*/ - -/* This file is included! */ -#ifdef XML_TOK_IMPL_C - -#ifndef IS_INVALID_CHAR -#define IS_INVALID_CHAR(enc, ptr, n) ((int)(enc) & 0) -#endif - -#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ - case BT_LEAD ## n: \ - if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ - if (IS_INVALID_CHAR(enc, ptr, n)) { \ - *(nextTokPtr) = (ptr); \ - return XML_TOK_INVALID; \ - } \ - ptr += n; \ - break; - -#define INVALID_CASES(ptr, nextTokPtr) \ - INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ - INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ - INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ - case BT_NONXML: \ - case BT_MALFORM: \ - case BT_TRAIL: \ - *(nextTokPtr) = (ptr); \ - return XML_TOK_INVALID; - -#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ - case BT_LEAD ## n: \ - if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ - if (!IS_NAME_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; \ - } \ - ptr += n; \ - break; - -#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ - case BT_NONASCII: \ - if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; \ - } \ - case BT_NMSTRT: \ - case BT_HEX: \ - case BT_DIGIT: \ - case BT_NAME: \ - case BT_MINUS: \ - ptr += MINBPC(enc); \ - break; \ - CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ - CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ - CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) - -#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ - case BT_LEAD ## n: \ - if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ - if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; \ - } \ - ptr += n; \ - break; - -#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ - case BT_NONASCII: \ - if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; \ - } \ - case BT_NMSTRT: \ - case BT_HEX: \ - ptr += MINBPC(enc); \ - break; \ - CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ - CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ - CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) - -#ifndef PREFIX -#define PREFIX(ident) ident -#endif - -/* ptr points to character following "<!-" */ - -static int PTRCALL -PREFIX(scanComment)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ - if (ptr != end) { - if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - ptr += MINBPC(enc); - while (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { - INVALID_CASES(ptr, nextTokPtr) - case BT_MINUS: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_COMMENT; - } - break; - default: - ptr += MINBPC(enc); - break; - } - } - } - return XML_TOK_PARTIAL; -} - -/* ptr points to character following "<!" */ - -static int PTRCALL -PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ - if (ptr == end) - return XML_TOK_PARTIAL; - switch (BYTE_TYPE(enc, ptr)) { - case BT_MINUS: - return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_LSQB: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_COND_SECT_OPEN; - case BT_NMSTRT: - case BT_HEX: - ptr += MINBPC(enc); - break; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - while (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { - case BT_PERCNT: - if (ptr + MINBPC(enc) == end) - return XML_TOK_PARTIAL; - /* don't allow <!ENTITY% foo "whatever"> */ - switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { - case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - /* fall through */ - case BT_S: case BT_CR: case BT_LF: - *nextTokPtr = ptr; - return XML_TOK_DECL_OPEN; - case BT_NMSTRT: - case BT_HEX: - ptr += MINBPC(enc); - break; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - } - return XML_TOK_PARTIAL; -} - -static int PTRCALL -PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, - const char *end, int *tokPtr) -{ - int upper = 0; - - (void)enc; - - *tokPtr = XML_TOK_PI; - if (end - ptr != MINBPC(enc)*3) - return 1; - switch (BYTE_TO_ASCII(enc, ptr)) { - case ASCII_x: - break; - case ASCII_X: - upper = 1; - break; - default: - return 1; - } - ptr += MINBPC(enc); - switch (BYTE_TO_ASCII(enc, ptr)) { - case ASCII_m: - break; - case ASCII_M: - upper = 1; - break; - default: - return 1; - } - ptr += MINBPC(enc); - switch (BYTE_TO_ASCII(enc, ptr)) { - case ASCII_l: - break; - case ASCII_L: - upper = 1; - break; - default: - return 1; - } - if (upper) - return 0; - *tokPtr = XML_TOK_XML_DECL; - return 1; -} - -/* ptr points to character following "<?" */ - -static int PTRCALL -PREFIX(scanPi)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ - int tok; - const char *target = ptr; - if (ptr == end) - return XML_TOK_PARTIAL; - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - while (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) - case BT_S: case BT_CR: case BT_LF: - if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - ptr += MINBPC(enc); - while (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { - INVALID_CASES(ptr, nextTokPtr) - case BT_QUEST: - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { - *nextTokPtr = ptr + MINBPC(enc); - return tok; - } - break; - default: - ptr += MINBPC(enc); - break; - } - } - return XML_TOK_PARTIAL; - case BT_QUEST: - if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { - *nextTokPtr = ptr + MINBPC(enc); - return tok; - } - /* fall through */ - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - } - return XML_TOK_PARTIAL; -} - -static int PTRCALL -PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ - static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, - ASCII_T, ASCII_A, ASCII_LSQB }; - int i; - - (void)enc; - - /* CDATA[ */ - if (end - ptr < 6 * MINBPC(enc)) - return XML_TOK_PARTIAL; - for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { - if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - } - *nextTokPtr = ptr; - return XML_TOK_CDATA_SECT_OPEN; -} - -static int PTRCALL -PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ - if (ptr == end) - return XML_TOK_NONE; - if (MINBPC(enc) > 1) { - size_t n = end - ptr; - if (n & (MINBPC(enc) - 1)) { - n &= ~(MINBPC(enc) - 1); - if (n == 0) - return XML_TOK_PARTIAL; - end = ptr + n; - } - } - switch (BYTE_TYPE(enc, ptr)) { - case BT_RSQB: - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) - break; - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { - ptr -= MINBPC(enc); - break; - } - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_CDATA_SECT_CLOSE; - case BT_CR: - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - if (BYTE_TYPE(enc, ptr) == BT_LF) - ptr += MINBPC(enc); - *nextTokPtr = ptr; - return XML_TOK_DATA_NEWLINE; - case BT_LF: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_DATA_NEWLINE; - INVALID_CASES(ptr, nextTokPtr) - default: - ptr += MINBPC(enc); - break; - } - while (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_DATA_CHARS; \ - } \ - ptr += n; \ - break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE - case BT_NONXML: - case BT_MALFORM: - case BT_TRAIL: - case BT_CR: - case BT_LF: - case BT_RSQB: - *nextTokPtr = ptr; - return XML_TOK_DATA_CHARS; - default: - ptr += MINBPC(enc); - break; - } - } - *nextTokPtr = ptr; - return XML_TOK_DATA_CHARS; -} - -/* ptr points to character following "</" */ - -static int PTRCALL -PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ - if (ptr == end) - return XML_TOK_PARTIAL; - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - while (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) - case BT_S: case BT_CR: case BT_LF: - for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { - switch (BYTE_TYPE(enc, ptr)) { - case BT_S: case BT_CR: case BT_LF: - break; - case BT_GT: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_END_TAG; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - } - return XML_TOK_PARTIAL; -#ifdef XML_NS - case BT_COLON: - /* no need to check qname syntax here, - since end-tag must match exactly */ - ptr += MINBPC(enc); - break; -#endif - case BT_GT: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_END_TAG; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - } - return XML_TOK_PARTIAL; -} - -/* ptr points to character following "&#X" */ - -static int PTRCALL -PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ - if (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { - case BT_DIGIT: - case BT_HEX: - break; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { - switch (BYTE_TYPE(enc, ptr)) { - case BT_DIGIT: - case BT_HEX: - break; - case BT_SEMI: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_CHAR_REF; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - } - } - return XML_TOK_PARTIAL; -} - -/* ptr points to character following "&#" */ - -static int PTRCALL -PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ - if (ptr != end) { - if (CHAR_MATCHES(enc, ptr, ASCII_x)) - return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); - switch (BYTE_TYPE(enc, ptr)) { - case BT_DIGIT: - break; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { - switch (BYTE_TYPE(enc, ptr)) { - case BT_DIGIT: - break; - case BT_SEMI: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_CHAR_REF; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - } - } - return XML_TOK_PARTIAL; -} - -/* ptr points to character following "&" */ - -static int PTRCALL -PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ - if (ptr == end) - return XML_TOK_PARTIAL; - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) - case BT_NUM: - return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - while (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) - case BT_SEMI: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_ENTITY_REF; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - } - return XML_TOK_PARTIAL; -} - -/* ptr points to character following first character of attribute name */ - -static int PTRCALL -PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ -#ifdef XML_NS - int hadColon = 0; -#endif - while (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) -#ifdef XML_NS - case BT_COLON: - if (hadColon) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - hadColon = 1; - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - break; -#endif - case BT_S: case BT_CR: case BT_LF: - for (;;) { - int t; - - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - t = BYTE_TYPE(enc, ptr); - if (t == BT_EQUALS) - break; - switch (t) { - case BT_S: - case BT_LF: - case BT_CR: - break; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - } - /* fall through */ - case BT_EQUALS: - { - int open; -#ifdef XML_NS - hadColon = 0; -#endif - for (;;) { - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - open = BYTE_TYPE(enc, ptr); - if (open == BT_QUOT || open == BT_APOS) - break; - switch (open) { - case BT_S: - case BT_LF: - case BT_CR: - break; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - } - ptr += MINBPC(enc); - /* in attribute value */ - for (;;) { - int t; - if (ptr == end) - return XML_TOK_PARTIAL; - t = BYTE_TYPE(enc, ptr); - if (t == open) - break; - switch (t) { - INVALID_CASES(ptr, nextTokPtr) - case BT_AMP: - { - int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); - if (tok <= 0) { - if (tok == XML_TOK_INVALID) - *nextTokPtr = ptr; - return tok; - } - break; - } - case BT_LT: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - default: - ptr += MINBPC(enc); - break; - } - } - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - switch (BYTE_TYPE(enc, ptr)) { - case BT_S: - case BT_CR: - case BT_LF: - break; - case BT_SOL: - goto sol; - case BT_GT: - goto gt; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - /* ptr points to closing quote */ - for (;;) { - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) - case BT_S: case BT_CR: case BT_LF: - continue; - case BT_GT: - gt: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_START_TAG_WITH_ATTS; - case BT_SOL: - sol: - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - break; - } - break; - } - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - } - return XML_TOK_PARTIAL; -} - -/* ptr points to character following "<" */ - -static int PTRCALL -PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ -#ifdef XML_NS - int hadColon; -#endif - if (ptr == end) - return XML_TOK_PARTIAL; - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) - case BT_EXCL: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; - switch (BYTE_TYPE(enc, ptr)) { - case BT_MINUS: - return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_LSQB: - return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), - end, nextTokPtr); - } - *nextTokPtr = ptr; - return XML_TOK_INVALID; - case BT_QUEST: - return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_SOL: - return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } -#ifdef XML_NS - hadColon = 0; -#endif - /* we have a start-tag */ - while (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) -#ifdef XML_NS - case BT_COLON: - if (hadColon) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - hadColon = 1; - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - break; -#endif - case BT_S: case BT_CR: case BT_LF: - { - ptr += MINBPC(enc); - while (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) - case BT_GT: - goto gt; - case BT_SOL: - goto sol; - case BT_S: case BT_CR: case BT_LF: - ptr += MINBPC(enc); - continue; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); - } - return XML_TOK_PARTIAL; - } - case BT_GT: - gt: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_START_TAG_NO_ATTS; - case BT_SOL: - sol: - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_EMPTY_ELEMENT_NO_ATTS; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - } - return XML_TOK_PARTIAL; -} - -static int PTRCALL -PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ - if (ptr == end) - return XML_TOK_NONE; - if (MINBPC(enc) > 1) { - size_t n = end - ptr; - if (n & (MINBPC(enc) - 1)) { - n &= ~(MINBPC(enc) - 1); - if (n == 0) - return XML_TOK_PARTIAL; - end = ptr + n; - } - } - switch (BYTE_TYPE(enc, ptr)) { - case BT_LT: - return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_AMP: - return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_CR: - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_TRAILING_CR; - if (BYTE_TYPE(enc, ptr) == BT_LF) - ptr += MINBPC(enc); - *nextTokPtr = ptr; - return XML_TOK_DATA_NEWLINE; - case BT_LF: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_DATA_NEWLINE; - case BT_RSQB: - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_TRAILING_RSQB; - if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) - break; - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_TRAILING_RSQB; - if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { - ptr -= MINBPC(enc); - break; - } - *nextTokPtr = ptr; - return XML_TOK_INVALID; - INVALID_CASES(ptr, nextTokPtr) - default: - ptr += MINBPC(enc); - break; - } - while (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ - *nextTokPtr = ptr; \ - return XML_TOK_DATA_CHARS; \ - } \ - ptr += n; \ - break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE - case BT_RSQB: - if (ptr + MINBPC(enc) != end) { - if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { - ptr += MINBPC(enc); - break; - } - if (ptr + 2*MINBPC(enc) != end) { - if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { - ptr += MINBPC(enc); - break; - } - *nextTokPtr = ptr + 2*MINBPC(enc); - return XML_TOK_INVALID; - } - } - /* fall through */ - case BT_AMP: - case BT_LT: - case BT_NONXML: - case BT_MALFORM: - case BT_TRAIL: - case BT_CR: - case BT_LF: - *nextTokPtr = ptr; - return XML_TOK_DATA_CHARS; - default: - ptr += MINBPC(enc); - break; - } - } - *nextTokPtr = ptr; - return XML_TOK_DATA_CHARS; -} - -/* ptr points to character following "%" */ - -static int PTRCALL -PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ - if (ptr == end) - return -XML_TOK_PERCENT; - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) - case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: - *nextTokPtr = ptr; - return XML_TOK_PERCENT; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - while (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) - case BT_SEMI: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_PARAM_ENTITY_REF; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - } - return XML_TOK_PARTIAL; -} - -static int PTRCALL -PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ - if (ptr == end) - return XML_TOK_PARTIAL; - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - while (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) - case BT_CR: case BT_LF: case BT_S: - case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR: - *nextTokPtr = ptr; - return XML_TOK_POUND_NAME; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - } - return -XML_TOK_POUND_NAME; -} - -static int PTRCALL -PREFIX(scanLit)(int open, const ENCODING *enc, - const char *ptr, const char *end, - const char **nextTokPtr) -{ - while (ptr != end) { - int t = BYTE_TYPE(enc, ptr); - switch (t) { - INVALID_CASES(ptr, nextTokPtr) - case BT_QUOT: - case BT_APOS: - ptr += MINBPC(enc); - if (t != open) - break; - if (ptr == end) - return -XML_TOK_LITERAL; - *nextTokPtr = ptr; - switch (BYTE_TYPE(enc, ptr)) { - case BT_S: case BT_CR: case BT_LF: - case BT_GT: case BT_PERCNT: case BT_LSQB: - return XML_TOK_LITERAL; - default: - return XML_TOK_INVALID; - } - default: - ptr += MINBPC(enc); - break; - } - } - return XML_TOK_PARTIAL; -} - -static int PTRCALL -PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, - const char **nextTokPtr) -{ - int tok; - if (ptr == end) - return XML_TOK_NONE; - if (MINBPC(enc) > 1) { - size_t n = end - ptr; - if (n & (MINBPC(enc) - 1)) { - n &= ~(MINBPC(enc) - 1); - if (n == 0) - return XML_TOK_PARTIAL; - end = ptr + n; - } - } - switch (BYTE_TYPE(enc, ptr)) { - case BT_QUOT: - return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_APOS: - return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_LT: - { - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; - switch (BYTE_TYPE(enc, ptr)) { - case BT_EXCL: - return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_QUEST: - return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_NMSTRT: - case BT_HEX: - case BT_NONASCII: - case BT_LEAD2: - case BT_LEAD3: - case BT_LEAD4: - *nextTokPtr = ptr - MINBPC(enc); - return XML_TOK_INSTANCE_START; - } - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - case BT_CR: - if (ptr + MINBPC(enc) == end) { - *nextTokPtr = end; - /* indicate that this might be part of a CR/LF pair */ - return -XML_TOK_PROLOG_S; - } - /* fall through */ - case BT_S: case BT_LF: - for (;;) { - ptr += MINBPC(enc); - if (ptr == end) - break; - switch (BYTE_TYPE(enc, ptr)) { - case BT_S: case BT_LF: - break; - case BT_CR: - /* don't split CR/LF pair */ - if (ptr + MINBPC(enc) != end) - break; - /* fall through */ - default: - *nextTokPtr = ptr; - return XML_TOK_PROLOG_S; - } - } - *nextTokPtr = ptr; - return XML_TOK_PROLOG_S; - case BT_PERCNT: - return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); - case BT_COMMA: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_COMMA; - case BT_LSQB: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_OPEN_BRACKET; - case BT_RSQB: - ptr += MINBPC(enc); - if (ptr == end) - return -XML_TOK_CLOSE_BRACKET; - if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { - if (ptr + MINBPC(enc) == end) - return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { - *nextTokPtr = ptr + 2*MINBPC(enc); - return XML_TOK_COND_SECT_CLOSE; - } - } - *nextTokPtr = ptr; - return XML_TOK_CLOSE_BRACKET; - case BT_LPAR: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_OPEN_PAREN; - case BT_RPAR: - ptr += MINBPC(enc); - if (ptr == end) - return -XML_TOK_CLOSE_PAREN; - switch (BYTE_TYPE(enc, ptr)) { - case BT_AST: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_CLOSE_PAREN_ASTERISK; - case BT_QUEST: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_CLOSE_PAREN_QUESTION; - case BT_PLUS: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_CLOSE_PAREN_PLUS; - case BT_CR: case BT_LF: case BT_S: - case BT_GT: case BT_COMMA: case BT_VERBAR: - case BT_RPAR: - *nextTokPtr = ptr; - return XML_TOK_CLOSE_PAREN; - } - *nextTokPtr = ptr; - return XML_TOK_INVALID; - case BT_VERBAR: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_OR; - case BT_GT: - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_DECL_CLOSE; - case BT_NUM: - return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (end - ptr < n) \ - return XML_TOK_PARTIAL_CHAR; \ - if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ - ptr += n; \ - tok = XML_TOK_NAME; \ - break; \ - } \ - if (IS_NAME_CHAR(enc, ptr, n)) { \ - ptr += n; \ - tok = XML_TOK_NMTOKEN; \ - break; \ - } \ - *nextTokPtr = ptr; \ - return XML_TOK_INVALID; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE - case BT_NMSTRT: - case BT_HEX: - tok = XML_TOK_NAME; - ptr += MINBPC(enc); - break; - case BT_DIGIT: - case BT_NAME: - case BT_MINUS: -#ifdef XML_NS - case BT_COLON: -#endif - tok = XML_TOK_NMTOKEN; - ptr += MINBPC(enc); - break; - case BT_NONASCII: - if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { - ptr += MINBPC(enc); - tok = XML_TOK_NAME; - break; - } - if (IS_NAME_CHAR_MINBPC(enc, ptr)) { - ptr += MINBPC(enc); - tok = XML_TOK_NMTOKEN; - break; - } - /* fall through */ - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - while (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) - case BT_GT: case BT_RPAR: case BT_COMMA: - case BT_VERBAR: case BT_LSQB: case BT_PERCNT: - case BT_S: case BT_CR: case BT_LF: - *nextTokPtr = ptr; - return tok; -#ifdef XML_NS - case BT_COLON: - ptr += MINBPC(enc); - switch (tok) { - case XML_TOK_NAME: - if (ptr == end) - return XML_TOK_PARTIAL; - tok = XML_TOK_PREFIXED_NAME; - switch (BYTE_TYPE(enc, ptr)) { - CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) - default: - tok = XML_TOK_NMTOKEN; - break; - } - break; - case XML_TOK_PREFIXED_NAME: - tok = XML_TOK_NMTOKEN; - break; - } - break; -#endif - case BT_PLUS: - if (tok == XML_TOK_NMTOKEN) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_NAME_PLUS; - case BT_AST: - if (tok == XML_TOK_NMTOKEN) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_NAME_ASTERISK; - case BT_QUEST: - if (tok == XML_TOK_NMTOKEN) { - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_NAME_QUESTION; - default: - *nextTokPtr = ptr; - return XML_TOK_INVALID; - } - } - return -tok; -} - -static int PTRCALL -PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ - const char *start; - if (ptr == end) - return XML_TOK_NONE; - start = ptr; - while (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: ptr += n; break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE - case BT_AMP: - if (ptr == start) - return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); - *nextTokPtr = ptr; - return XML_TOK_DATA_CHARS; - case BT_LT: - /* this is for inside entity references */ - *nextTokPtr = ptr; - return XML_TOK_INVALID; - case BT_LF: - if (ptr == start) { - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_DATA_NEWLINE; - } - *nextTokPtr = ptr; - return XML_TOK_DATA_CHARS; - case BT_CR: - if (ptr == start) { - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_TRAILING_CR; - if (BYTE_TYPE(enc, ptr) == BT_LF) - ptr += MINBPC(enc); - *nextTokPtr = ptr; - return XML_TOK_DATA_NEWLINE; - } - *nextTokPtr = ptr; - return XML_TOK_DATA_CHARS; - case BT_S: - if (ptr == start) { - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_ATTRIBUTE_VALUE_S; - } - *nextTokPtr = ptr; - return XML_TOK_DATA_CHARS; - default: - ptr += MINBPC(enc); - break; - } - } - *nextTokPtr = ptr; - return XML_TOK_DATA_CHARS; -} - -static int PTRCALL -PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ - const char *start; - if (ptr == end) - return XML_TOK_NONE; - start = ptr; - while (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: ptr += n; break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE - case BT_AMP: - if (ptr == start) - return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); - *nextTokPtr = ptr; - return XML_TOK_DATA_CHARS; - case BT_PERCNT: - if (ptr == start) { - int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), - end, nextTokPtr); - return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; - } - *nextTokPtr = ptr; - return XML_TOK_DATA_CHARS; - case BT_LF: - if (ptr == start) { - *nextTokPtr = ptr + MINBPC(enc); - return XML_TOK_DATA_NEWLINE; - } - *nextTokPtr = ptr; - return XML_TOK_DATA_CHARS; - case BT_CR: - if (ptr == start) { - ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_TRAILING_CR; - if (BYTE_TYPE(enc, ptr) == BT_LF) - ptr += MINBPC(enc); - *nextTokPtr = ptr; - return XML_TOK_DATA_NEWLINE; - } - *nextTokPtr = ptr; - return XML_TOK_DATA_CHARS; - default: - ptr += MINBPC(enc); - break; - } - } - *nextTokPtr = ptr; - return XML_TOK_DATA_CHARS; -} - -#ifdef XML_DTD - -static int PTRCALL -PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, - const char *end, const char **nextTokPtr) -{ - int level = 0; - if (MINBPC(enc) > 1) { - size_t n = end - ptr; - if (n & (MINBPC(enc) - 1)) { - n &= ~(MINBPC(enc) - 1); - end = ptr + n; - } - } - while (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { - INVALID_CASES(ptr, nextTokPtr) - case BT_LT: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { - ++level; - ptr += MINBPC(enc); - } - } - break; - case BT_RSQB: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; - if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { - ptr += MINBPC(enc); - if (level == 0) { - *nextTokPtr = ptr; - return XML_TOK_IGNORE_SECT; - } - --level; - } - } - break; - default: - ptr += MINBPC(enc); - break; - } - } - return XML_TOK_PARTIAL; -} - -#endif /* XML_DTD */ - -static int PTRCALL -PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, - const char **badPtr) -{ - ptr += MINBPC(enc); - end -= MINBPC(enc); - for (; ptr != end; ptr += MINBPC(enc)) { - switch (BYTE_TYPE(enc, ptr)) { - case BT_DIGIT: - case BT_HEX: - case BT_MINUS: - case BT_APOS: - case BT_LPAR: - case BT_RPAR: - case BT_PLUS: - case BT_COMMA: - case BT_SOL: - case BT_EQUALS: - case BT_QUEST: - case BT_CR: - case BT_LF: - case BT_SEMI: - case BT_EXCL: - case BT_AST: - case BT_PERCNT: - case BT_NUM: -#ifdef XML_NS - case BT_COLON: -#endif - break; - case BT_S: - if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { - *badPtr = ptr; - return 0; - } - break; - case BT_NAME: - case BT_NMSTRT: - if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) - break; - default: - switch (BYTE_TO_ASCII(enc, ptr)) { - case 0x24: /* $ */ - case 0x40: /* @ */ - break; - default: - *badPtr = ptr; - return 0; - } - break; - } - } - return 1; -} - -/* This must only be called for a well-formed start-tag or empty - element tag. Returns the number of attributes. Pointers to the - first attsMax attributes are stored in atts. -*/ - -static int PTRCALL -PREFIX(getAtts)(const ENCODING *enc, const char *ptr, - int attsMax, ATTRIBUTE *atts) -{ - enum { other, inName, inValue } state = inName; - int nAtts = 0; - int open = 0; /* defined when state == inValue; - initialization just to shut up compilers */ - - for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { - switch (BYTE_TYPE(enc, ptr)) { -#define START_NAME \ - if (state == other) { \ - if (nAtts < attsMax) { \ - atts[nAtts].name = ptr; \ - atts[nAtts].normalized = 1; \ - } \ - state = inName; \ - } -#define LEAD_CASE(n) \ - case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE - case BT_NONASCII: - case BT_NMSTRT: - case BT_HEX: - START_NAME - break; -#undef START_NAME - case BT_QUOT: - if (state != inValue) { - if (nAtts < attsMax) - atts[nAtts].valuePtr = ptr + MINBPC(enc); - state = inValue; - open = BT_QUOT; - } - else if (open == BT_QUOT) { - state = other; - if (nAtts < attsMax) - atts[nAtts].valueEnd = ptr; - nAtts++; - } - break; - case BT_APOS: - if (state != inValue) { - if (nAtts < attsMax) - atts[nAtts].valuePtr = ptr + MINBPC(enc); - state = inValue; - open = BT_APOS; - } - else if (open == BT_APOS) { - state = other; - if (nAtts < attsMax) - atts[nAtts].valueEnd = ptr; - nAtts++; - } - break; - case BT_AMP: - if (nAtts < attsMax) - atts[nAtts].normalized = 0; - break; - case BT_S: - if (state == inName) - state = other; - else if (state == inValue - && nAtts < attsMax - && atts[nAtts].normalized - && (ptr == atts[nAtts].valuePtr - || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE - || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE - || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) - atts[nAtts].normalized = 0; - break; - case BT_CR: case BT_LF: - /* This case ensures that the first attribute name is counted - Apart from that we could just change state on the quote. */ - if (state == inName) - state = other; - else if (state == inValue && nAtts < attsMax) - atts[nAtts].normalized = 0; - break; - case BT_GT: - case BT_SOL: - if (state != inValue) - return nAtts; - break; - default: - break; - } - } - /* not reached */ -} - -static int PTRFASTCALL -PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) -{ - int result = 0; - - (void)enc; - - /* skip &# */ - ptr += 2*MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, ASCII_x)) { - for (ptr += MINBPC(enc); - !CHAR_MATCHES(enc, ptr, ASCII_SEMI); - ptr += MINBPC(enc)) { - int c = BYTE_TO_ASCII(enc, ptr); - switch (c) { - case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: - case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: - result <<= 4; - result |= (c - ASCII_0); - break; - case ASCII_A: case ASCII_B: case ASCII_C: - case ASCII_D: case ASCII_E: case ASCII_F: - result <<= 4; - result += 10 + (c - ASCII_A); - break; - case ASCII_a: case ASCII_b: case ASCII_c: - case ASCII_d: case ASCII_e: case ASCII_f: - result <<= 4; - result += 10 + (c - ASCII_a); - break; - } - if (result >= 0x110000) - return -1; - } - } - else { - for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { - int c = BYTE_TO_ASCII(enc, ptr); - result *= 10; - result += (c - ASCII_0); - if (result >= 0x110000) - return -1; - } - } - return checkCharRefNumber(result); -} - -static int PTRCALL -PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, - const char *end) -{ - (void)enc; - - switch ((end - ptr)/MINBPC(enc)) { - case 2: - if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { - switch (BYTE_TO_ASCII(enc, ptr)) { - case ASCII_l: - return ASCII_LT; - case ASCII_g: - return ASCII_GT; - } - } - break; - case 3: - if (CHAR_MATCHES(enc, ptr, ASCII_a)) { - ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, ASCII_m)) { - ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, ASCII_p)) - return ASCII_AMP; - } - } - break; - case 4: - switch (BYTE_TO_ASCII(enc, ptr)) { - case ASCII_q: - ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, ASCII_u)) { - ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, ASCII_o)) { - ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, ASCII_t)) - return ASCII_QUOT; - } - } - break; - case ASCII_a: - ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, ASCII_p)) { - ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, ASCII_o)) { - ptr += MINBPC(enc); - if (CHAR_MATCHES(enc, ptr, ASCII_s)) - return ASCII_APOS; - } - } - break; - } - } - return 0; -} - -static int PTRCALL -PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) -{ - for (;;) { - switch (BYTE_TYPE(enc, ptr1)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - if (*ptr1++ != *ptr2++) \ - return 0; - LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) -#undef LEAD_CASE - /* fall through */ - if (*ptr1++ != *ptr2++) - return 0; - break; - case BT_NONASCII: - case BT_NMSTRT: -#ifdef XML_NS - case BT_COLON: -#endif - case BT_HEX: - case BT_DIGIT: - case BT_NAME: - case BT_MINUS: - if (*ptr2++ != *ptr1++) - return 0; - if (MINBPC(enc) > 1) { - if (*ptr2++ != *ptr1++) - return 0; - if (MINBPC(enc) > 2) { - if (*ptr2++ != *ptr1++) - return 0; - if (MINBPC(enc) > 3) { - if (*ptr2++ != *ptr1++) - return 0; - } - } - } - break; - default: - if (MINBPC(enc) == 1 && *ptr1 == *ptr2) - return 1; - switch (BYTE_TYPE(enc, ptr2)) { - case BT_LEAD2: - case BT_LEAD3: - case BT_LEAD4: - case BT_NONASCII: - case BT_NMSTRT: -#ifdef XML_NS - case BT_COLON: -#endif - case BT_HEX: - case BT_DIGIT: - case BT_NAME: - case BT_MINUS: - return 0; - default: - return 1; - } - } - } - /* not reached */ -} - -static int PTRCALL -PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, - const char *end1, const char *ptr2) -{ - (void)enc; - - for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { - if (ptr1 == end1) - return 0; - if (!CHAR_MATCHES(enc, ptr1, *ptr2)) - return 0; - } - return ptr1 == end1; -} - -static int PTRFASTCALL -PREFIX(nameLength)(const ENCODING *enc, const char *ptr) -{ - const char *start = ptr; - for (;;) { - switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: ptr += n; break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE - case BT_NONASCII: - case BT_NMSTRT: -#ifdef XML_NS - case BT_COLON: -#endif - case BT_HEX: - case BT_DIGIT: - case BT_NAME: - case BT_MINUS: - ptr += MINBPC(enc); - break; - default: - return (int)(ptr - start); - } - } -} - -static const char * PTRFASTCALL -PREFIX(skipS)(const ENCODING *enc, const char *ptr) -{ - for (;;) { - switch (BYTE_TYPE(enc, ptr)) { - case BT_LF: - case BT_CR: - case BT_S: - ptr += MINBPC(enc); - break; - default: - return ptr; - } - } -} - -static void PTRCALL -PREFIX(updatePosition)(const ENCODING *enc, - const char *ptr, - const char *end, - POSITION *pos) -{ - while (ptr != end) { - switch (BYTE_TYPE(enc, ptr)) { -#define LEAD_CASE(n) \ - case BT_LEAD ## n: \ - ptr += n; \ - break; - LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) -#undef LEAD_CASE - case BT_LF: - pos->columnNumber = (XML_Size)-1; - pos->lineNumber++; - ptr += MINBPC(enc); - break; - case BT_CR: - pos->lineNumber++; - ptr += MINBPC(enc); - if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) - ptr += MINBPC(enc); - pos->columnNumber = (XML_Size)-1; - break; - default: - ptr += MINBPC(enc); - break; - } - pos->columnNumber++; - } -} - -#undef DO_LEAD_CASE -#undef MULTIBYTE_CASES -#undef INVALID_CASES -#undef CHECK_NAME_CASE -#undef CHECK_NAME_CASES -#undef CHECK_NMSTRT_CASE -#undef CHECK_NMSTRT_CASES - -#else /* XML_TOK_IMPL_C */ - -int xml_tok_impl_c; - -#endif /* XML_TOK_IMPL_C */ |