summaryrefslogtreecommitdiffstats
path: root/node_modules/xss/lib/parser.js
diff options
context:
space:
mode:
Diffstat (limited to 'node_modules/xss/lib/parser.js')
-rw-r--r--node_modules/xss/lib/parser.js239
1 files changed, 239 insertions, 0 deletions
diff --git a/node_modules/xss/lib/parser.js b/node_modules/xss/lib/parser.js
new file mode 100644
index 0000000..7c15def
--- /dev/null
+++ b/node_modules/xss/lib/parser.js
@@ -0,0 +1,239 @@
+/**
+ * Simple HTML Parser
+ *
+ * @author Zongmin Lei<leizongmin@gmail.com>
+ */
+
+var _ = require("./util");
+
+/**
+ * get tag name
+ *
+ * @param {String} html e.g. '<a hef="#">'
+ * @return {String}
+ */
+function getTagName(html) {
+ var i = _.spaceIndex(html);
+ if (i === -1) {
+ var tagName = html.slice(1, -1);
+ } else {
+ var tagName = html.slice(1, i + 1);
+ }
+ tagName = _.trim(tagName).toLowerCase();
+ if (tagName.slice(0, 1) === "/") tagName = tagName.slice(1);
+ if (tagName.slice(-1) === "/") tagName = tagName.slice(0, -1);
+ return tagName;
+}
+
+/**
+ * is close tag?
+ *
+ * @param {String} html 如:'<a hef="#">'
+ * @return {Boolean}
+ */
+function isClosing(html) {
+ return html.slice(0, 2) === "</";
+}
+
+/**
+ * parse input html and returns processed html
+ *
+ * @param {String} html
+ * @param {Function} onTag e.g. function (sourcePosition, position, tag, html, isClosing)
+ * @param {Function} escapeHtml
+ * @return {String}
+ */
+function parseTag(html, onTag, escapeHtml) {
+ "user strict";
+
+ var rethtml = "";
+ var lastPos = 0;
+ var tagStart = false;
+ var quoteStart = false;
+ var currentPos = 0;
+ var len = html.length;
+ var currentTagName = "";
+ var currentHtml = "";
+
+ for (currentPos = 0; currentPos < len; currentPos++) {
+ var c = html.charAt(currentPos);
+ if (tagStart === false) {
+ if (c === "<") {
+ tagStart = currentPos;
+ continue;
+ }
+ } else {
+ if (quoteStart === false) {
+ if (c === "<") {
+ rethtml += escapeHtml(html.slice(lastPos, currentPos));
+ tagStart = currentPos;
+ lastPos = currentPos;
+ continue;
+ }
+ if (c === ">") {
+ rethtml += escapeHtml(html.slice(lastPos, tagStart));
+ currentHtml = html.slice(tagStart, currentPos + 1);
+ currentTagName = getTagName(currentHtml);
+ rethtml += onTag(
+ tagStart,
+ rethtml.length,
+ currentTagName,
+ currentHtml,
+ isClosing(currentHtml)
+ );
+ lastPos = currentPos + 1;
+ tagStart = false;
+ continue;
+ }
+ if ((c === '"' || c === "'") && html.charAt(currentPos - 1) === "=") {
+ quoteStart = c;
+ continue;
+ }
+ } else {
+ if (c === quoteStart) {
+ quoteStart = false;
+ continue;
+ }
+ }
+ }
+ }
+ if (lastPos < html.length) {
+ rethtml += escapeHtml(html.substr(lastPos));
+ }
+
+ return rethtml;
+}
+
+var REGEXP_ILLEGAL_ATTR_NAME = /[^a-zA-Z0-9_:\.\-]/gim;
+
+/**
+ * parse input attributes and returns processed attributes
+ *
+ * @param {String} html e.g. `href="#" target="_blank"`
+ * @param {Function} onAttr e.g. `function (name, value)`
+ * @return {String}
+ */
+function parseAttr(html, onAttr) {
+ "user strict";
+
+ var lastPos = 0;
+ var retAttrs = [];
+ var tmpName = false;
+ var len = html.length;
+
+ function addAttr(name, value) {
+ name = _.trim(name);
+ name = name.replace(REGEXP_ILLEGAL_ATTR_NAME, "").toLowerCase();
+ if (name.length < 1) return;
+ var ret = onAttr(name, value || "");
+ if (ret) retAttrs.push(ret);
+ }
+
+ // 逐个分析字符
+ for (var i = 0; i < len; i++) {
+ var c = html.charAt(i);
+ var v, j;
+ if (tmpName === false && c === "=") {
+ tmpName = html.slice(lastPos, i);
+ lastPos = i + 1;
+ continue;
+ }
+ if (tmpName !== false) {
+ if (
+ i === lastPos &&
+ (c === '"' || c === "'") &&
+ html.charAt(i - 1) === "="
+ ) {
+ j = html.indexOf(c, i + 1);
+ if (j === -1) {
+ break;
+ } else {
+ v = _.trim(html.slice(lastPos + 1, j));
+ addAttr(tmpName, v);
+ tmpName = false;
+ i = j;
+ lastPos = i + 1;
+ continue;
+ }
+ }
+ }
+ if (/\s|\n|\t/.test(c)) {
+ html = html.replace(/\s|\n|\t/g, " ");
+ if (tmpName === false) {
+ j = findNextEqual(html, i);
+ if (j === -1) {
+ v = _.trim(html.slice(lastPos, i));
+ addAttr(v);
+ tmpName = false;
+ lastPos = i + 1;
+ continue;
+ } else {
+ i = j - 1;
+ continue;
+ }
+ } else {
+ j = findBeforeEqual(html, i - 1);
+ if (j === -1) {
+ v = _.trim(html.slice(lastPos, i));
+ v = stripQuoteWrap(v);
+ addAttr(tmpName, v);
+ tmpName = false;
+ lastPos = i + 1;
+ continue;
+ } else {
+ continue;
+ }
+ }
+ }
+ }
+
+ if (lastPos < html.length) {
+ if (tmpName === false) {
+ addAttr(html.slice(lastPos));
+ } else {
+ addAttr(tmpName, stripQuoteWrap(_.trim(html.slice(lastPos))));
+ }
+ }
+
+ return _.trim(retAttrs.join(" "));
+}
+
+function findNextEqual(str, i) {
+ for (; i < str.length; i++) {
+ var c = str[i];
+ if (c === " ") continue;
+ if (c === "=") return i;
+ return -1;
+ }
+}
+
+function findBeforeEqual(str, i) {
+ for (; i > 0; i--) {
+ var c = str[i];
+ if (c === " ") continue;
+ if (c === "=") return i;
+ return -1;
+ }
+}
+
+function isQuoteWrapString(text) {
+ if (
+ (text[0] === '"' && text[text.length - 1] === '"') ||
+ (text[0] === "'" && text[text.length - 1] === "'")
+ ) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+function stripQuoteWrap(text) {
+ if (isQuoteWrapString(text)) {
+ return text.substr(1, text.length - 2);
+ } else {
+ return text;
+ }
+}
+
+exports.parseTag = parseTag;
+exports.parseAttr = parseAttr;