var cfg = require("./config.js"), blankChar = cfg.blankChar, CssHandler = require("./CssHandler.js"), { screenWidth, system } = wx.getSystemInfoSync(); // #ifdef MP-BAIDU || MP-ALIPAY || MP-TOUTIAO var entities = { lt: "<", gt: ">", amp: "&", quot: '"', apos: "'", nbsp: "\xA0", ensp: "\u2002", emsp: "\u2003", ndash: "–", mdash: "—", middot: "·", lsquo: "‘", rsquo: "’", ldquo: "“", rdquo: "”", bull: "•", hellip: "…", permil: "‰", copy: "©", reg: "®", trade: "™", times: "×", divide: "÷", cent: "¢", pound: "£", yen: "¥", euro: "€", sect: "§", }; // #endif var emoji; // emoji 补丁包 https://jin-yufeng.github.io/Parser/#/instructions?id=emoji class MpHtmlParser { constructor(data, options = {}) { this.attrs = {}; this.compress = options.compress; this.CssHandler = new CssHandler(options.tagStyle, screenWidth); this.data = data; this.domain = options.domain; this.DOM = []; this.i = this.start = this.audioNum = this.imgNum = this.videoNum = 0; this.protocol = this.domain && this.domain.includes("://") ? this.domain.split("://")[0] : ""; this.state = this.Text; this.STACK = []; this.useAnchor = options.useAnchor; this.xml = options.xml; } parse() { if (emoji) this.data = emoji.parseEmoji(this.data); for (var c; (c = this.data[this.i]); this.i++) this.state(c); if (this.state == this.Text) this.setText(); while (this.STACK.length) this.popNode(this.STACK.pop()); // #ifdef MP-BAIDU || MP-TOUTIAO // 将顶层标签的一些样式提取出来给 rich-text (function f(ns) { for (var i = ns.length, n; (n = ns[--i]); ) { if (n.type == "text") continue; if (!n.c) { var style = n.attrs.style; if (style) { var j, k, res; if ((j = style.indexOf("display")) != -1) res = style.substring( j, (k = style.indexOf(";", j)) == -1 ? style.length : k ); if ((j = style.indexOf("float")) != -1) res += ";" + style.substring( j, (k = style.indexOf(";", j)) == -1 ? style.length : k ); n.attrs.contain = res; } } else f(n.children); } })(this.DOM); // #endif if (this.DOM.length) { this.DOM[0].PoweredBy = "Parser"; if (this.title) this.DOM[0].title = this.title; } return this.DOM; } // 设置属性 setAttr() { var name = this.getName(this.attrName); if (cfg.trustAttrs[name]) { if (!this.attrVal) { if (cfg.boolAttrs[name]) this.attrs[name] = "T"; } else if (name == "src") this.attrs[name] = this.getUrl(this.attrVal.replace(/&/g, "&")); else this.attrs[name] = this.attrVal; } this.attrVal = ""; while (blankChar[this.data[this.i]]) this.i++; if (this.isClose()) this.setNode(); else { this.start = this.i; this.state = this.AttrName; } } // 设置文本节点 setText() { var back, text = this.section(); if (!text) return; text = (cfg.onText && cfg.onText(text, () => (back = true))) || text; if (back) { this.data = this.data.substr(0, this.start) + text + this.data.substr(this.i); let j = this.start + text.length; for (this.i = this.start; this.i < j; this.i++) this.state(this.data[this.i]); return; } if (!this.pre) { // 合并空白符 var tmp = []; for (let i = text.length, c; (c = text[--i]); ) if (!blankChar[c] || (!blankChar[tmp[0]] && (c = " "))) tmp.unshift(c); text = tmp.join(""); if (text == " ") return; } // 处理实体 var siblings = this.siblings(), i = -1, j, en; while (1) { if ((i = text.indexOf("&", i + 1)) == -1) break; if ((j = text.indexOf(";", i + 2)) == -1) break; if (text[i + 1] == "#") { en = parseInt( (text[i + 2] == "x" ? "0" : "") + text.substring(i + 2, j) ); if (!isNaN(en)) text = text.substr(0, i) + String.fromCharCode(en) + text.substring(j + 1); } else { en = text.substring(i + 1, j); // #ifdef MP-WEIXIN || MP-QQ || APP-PLUS if (en == "nbsp") text = text.substr(0, i) + "\xA0" + text.substr(j + 1); // 解决   失效 else if ( en != "lt" && en != "gt" && en != "amp" && en != "ensp" && en != "emsp" && en != "quot" && en != "apos" ) { i && siblings.push({ type: "text", text: text.substr(0, i), }); siblings.push({ type: "text", text: `&${en};`, en: 1, }); text = text.substr(j + 1); i = -1; } // #endif // #ifdef MP-BAIDU || MP-ALIPAY || MP-TOUTIAO if (entities[en]) text = text.substr(0, i) + entities[en] + text.substr(j + 1); // #endif } } text && siblings.push({ type: "text", text, }); } // 设置元素节点 setNode() { var node = { name: this.tagName.toLowerCase(), attrs: this.attrs, }, close = cfg.selfClosingTags[node.name] || (this.xml && this.data[this.i] == "/"); this.attrs = {}; if (!cfg.ignoreTags[node.name]) { this.matchAttr(node); if (!close) { node.children = []; if (node.name == "pre" && cfg.highlight) { this.remove(node); this.pre = node.pre = true; } this.siblings().push(node); this.STACK.push(node); } else if (!cfg.filter || cfg.filter(node, this) != false) this.siblings().push(node); } else { if (!close) this.remove(node); else if (node.name == "source") { var parent = this.STACK[this.STACK.length - 1], attrs = node.attrs; if (parent && attrs.src) if (parent.name == "video" || parent.name == "audio") parent.attrs.source.push(attrs.src); else { var i, media = attrs.media; if ( parent.name == "picture" && !parent.attrs.src && !(attrs.src.indexOf(".webp") && system.includes("iOS")) && (!media || (media.includes("px") && (((i = media.indexOf("min-width")) != -1 && (i = media.indexOf(":", i + 8)) != -1 && screenWidth > parseInt(media.substr(i + 1))) || ((i = media.indexOf("max-width")) != -1 && (i = media.indexOf(":", i + 8)) != -1 && screenWidth < parseInt(media.substr(i + 1)))))) ) parent.attrs.src = attrs.src; } } else if (node.name == "base" && !this.domain) this.domain = node.attrs.href; } if (this.data[this.i] == "/") this.i++; this.start = this.i + 1; this.state = this.Text; } // 移除标签 remove(node) { var name = node.name, j = this.i; while (1) { if ((this.i = this.data.indexOf("", this.i)) == -1) this.i = this.data.length; // 处理 svg if (name == "svg") { var src = this.data.substring(j, this.i + 1); if (!node.attrs.xmlns) src = ' xmlns="http://www.w3.org/2000/svg"' + src; var i = j; while (this.data[j] != "<") j--; src = this.data.substring(j, i) + src; var parent = this.STACK[this.STACK.length - 1]; if ( node.attrs.width == "100%" && parent && (parent.attrs.style || "").includes("inline") ) parent.attrs.style = "width:300px;max-width:100%;" + parent.attrs.style; this.siblings().push({ name: "img", attrs: { src: "data:image/svg+xml;utf8," + src.replace(/#/g, "%23"), ignore: "T", }, }); } return; } } } // 处理属性 matchAttr(node) { var attrs = node.attrs, style = this.CssHandler.match(node.name, attrs, node) + (attrs.style || ""), styleObj = {}; if (attrs.id) { if (this.compress & 1) attrs.id = void 0; else if (this.useAnchor) this.bubble(); } if (this.compress & 2 && attrs.class) attrs.class = void 0; switch (node.name) { case "img": if (attrs["data-src"]) { attrs.src = attrs.src || attrs["data-src"]; attrs["data-src"] = void 0; } if (attrs.src && !attrs.ignore) { if (this.bubble()) attrs.i = (this.imgNum++).toString(); else attrs.ignore = "T"; } break; case "a": case "ad": // #ifdef APP-PLUS case "iframe": case "embed": // #endif this.bubble(); break; case "font": if (attrs.color) { styleObj["color"] = attrs.color; attrs.color = void 0; } if (attrs.face) { styleObj["font-family"] = attrs.face; attrs.face = void 0; } if (attrs.size) { var size = parseInt(attrs.size); if (size < 1) size = 1; else if (size > 7) size = 7; var map = [ "xx-small", "x-small", "small", "medium", "large", "x-large", "xx-large", ]; styleObj["font-size"] = map[size - 1]; attrs.size = void 0; } break; case "video": case "audio": if (!attrs.id) attrs.id = node.name + ++this[`${node.name}Num`]; else this[`${node.name}Num`]++; if (node.name == "video") { if (attrs.width) { style = `width:${ parseFloat(attrs.width) + (attrs.width.includes("%") ? "%" : "px") };${style}`; attrs.width = void 0; } if (attrs.height) { style = `height:${ parseFloat(attrs.height) + (attrs.height.includes("%") ? "%" : "px") };${style}`; attrs.height = void 0; } if (this.videoNum > 3) node.lazyLoad = true; } attrs.source = []; if (attrs.src) attrs.source.push(attrs.src); if (!attrs.controls && !attrs.autoplay) console.warn( `存在没有 controls 属性的 ${node.name} 标签,可能导致无法播放`, node ); this.bubble(); break; case "td": case "th": if (attrs.colspan || attrs.rowspan) for (var k = this.STACK.length, item; (item = this.STACK[--k]); ) if (item.name == "table") { item.c = void 0; break; } } if (attrs.align) { styleObj["text-align"] = attrs.align; attrs.align = void 0; } // 压缩 style var styles = style .replace(/"/g, '"') .replace(/&/g, "&") .split(";"); style = ""; for (var i = 0, len = styles.length; i < len; i++) { var info = styles[i].split(":"); if (info.length < 2) continue; let key = info[0].trim().toLowerCase(), value = info.slice(1).join(":").trim(); if ( value.includes("-webkit") || value.includes("-moz") || value.includes("-ms") || value.includes("-o") || value.includes("safe") ) style += `;${key}:${value}`; else if ( !styleObj[key] || value.includes("import") || !styleObj[key].includes("import") ) styleObj[key] = value; } if ( node.name == "img" && parseInt(styleObj.width || attrs.width) > screenWidth ) styleObj.height = "auto"; for (var key in styleObj) { var value = styleObj[key]; if (key.includes("flex") || key == "order" || key == "self-align") node.c = 1; // 填充链接 if (value.includes("url")) { var j = value.indexOf("("); if (j++ != -1) { while (value[j] == '"' || value[j] == "'" || blankChar[value[j]]) j++; value = value.substr(0, j) + this.getUrl(value.substr(j)); } } // 转换 rpx else if (value.includes("rpx")) value = value.replace( /[0-9.]+\s*rpx/g, ($) => (parseFloat($) * screenWidth) / 750 + "px" ); else if (key == "white-space" && value.includes("pre")) this.pre = node.pre = true; style += `;${key}:${value}`; } style = style.substr(1); if (style) attrs.style = style; } // 节点出栈处理 popNode(node) { // 空白符处理 if (node.pre) { node.pre = this.pre = void 0; for (let i = this.STACK.length; i--; ) if (this.STACK[i].pre) this.pre = true; } if (node.name == "head" || (cfg.filter && cfg.filter(node, this) == false)) return this.siblings().pop(); var attrs = node.attrs; // 替换一些标签名 if (node.name == "picture") { node.name = "img"; if (!attrs.src && (node.children[0] || "").name == "img") attrs.src = node.children[0].attrs.src; if (attrs.src && !attrs.ignore) attrs.i = (this.imgNum++).toString(); return (node.children = void 0); } if (cfg.blockTags[node.name]) node.name = "div"; else if (!cfg.trustTags[node.name]) node.name = "span"; // 处理列表 if (node.c) { if (node.name == "ul") { var floor = 1; for (let i = this.STACK.length; i--; ) if (this.STACK[i].name == "ul") floor++; if (floor != 1) for (let i = node.children.length; i--; ) node.children[i].floor = floor; } else if (node.name == "ol") { for (let i = 0, num = 1, child; (child = node.children[i++]); ) if (child.name == "li") { child.type = "ol"; child.num = ((num, type) => { if (type == "a") return String.fromCharCode(97 + ((num - 1) % 26)); if (type == "A") return String.fromCharCode(65 + ((num - 1) % 26)); if (type == "i" || type == "I") { num = ((num - 1) % 99) + 1; var one = [ "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", ], ten = [ "X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", ], res = (ten[Math.floor(num / 10) - 1] || "") + (one[(num % 10) - 1] || ""); if (type == "i") return res.toLowerCase(); return res; } return num; })(num++, attrs.type) + "."; } } } // 处理表格的边框 if (node.name == "table") { var padding = attrs.cellpadding, spacing = attrs.cellspacing, border = attrs.border; if (node.c) { this.bubble(); if (!padding) padding = 2; if (!spacing) spacing = 2; } if (border) attrs.style = `border:${border}px solid gray;${attrs.style || ""}`; if (spacing) attrs.style = `border-spacing:${spacing}px;${attrs.style || ""}`; if (border || padding) (function f(ns) { for (var i = 0, n; (n = ns[i]); i++) { if (n.name == "th" || n.name == "td") { if (border) n.attrs.style = `border:${border}px solid gray;${n.attrs.style}`; if (padding) n.attrs.style = `padding:${padding}px;${n.attrs.style}`; } else f(n.children || []); } })(node.children); } this.CssHandler.pop && this.CssHandler.pop(node); // 自动压缩 if (node.name == "div" && !Object.keys(attrs).length) { var siblings = this.siblings(); if (node.children.length == 1 && node.children[0].name == "div") siblings[siblings.length - 1] = node.children[0]; } } // 工具函数 bubble() { for (var i = this.STACK.length, item; (item = this.STACK[--i]); ) { if (cfg.richOnlyTags[item.name]) { if (item.name == "table" && !Object.hasOwnProperty.call(item, "c")) item.c = 1; return false; } item.c = 1; } return true; } getName = (val) => (this.xml ? val : val.toLowerCase()); getUrl(url) { if (url[0] == "/") { if (url[1] == "/") url = this.protocol + ":" + url; else if (this.domain) url = this.domain + url; } else if (this.domain && url.indexOf("data:") != 0 && !url.includes("://")) url = this.domain + "/" + url; return url; } isClose = () => this.data[this.i] == ">" || (this.data[this.i] == "/" && this.data[this.i + 1] == ">"); section = () => this.data.substring(this.start, this.i); siblings = () => this.STACK.length ? this.STACK[this.STACK.length - 1].children : this.DOM; // 状态机 Text(c) { if (c == "<") { var next = this.data[this.i + 1], isLetter = (c) => (c >= "a" && c <= "z") || (c >= "A" && c <= "Z"); if (isLetter(next)) { this.setText(); this.start = this.i + 1; this.state = this.TagName; } else if (next == "/") { this.setText(); if (isLetter(this.data[++this.i + 1])) { this.start = this.i + 1; this.state = this.EndTag; } else this.Comment(); } else if (next == "!") { this.setText(); this.Comment(); } } } Comment() { var key; if (this.data.substring(this.i + 2, this.i + 4) == "--") key = "-->"; else if (this.data.substring(this.i + 2, this.i + 9) == "[CDATA[") key = "]]>"; else key = ">"; if ((this.i = this.data.indexOf(key, this.i + 2)) == -1) this.i = this.data.length; else this.i += key.length - 1; this.start = this.i + 1; this.state = this.Text; } TagName(c) { if (blankChar[c]) { this.tagName = this.section(); while (blankChar[this.data[this.i]]) this.i++; if (this.isClose()) this.setNode(); else { this.start = this.i; this.state = this.AttrName; } } else if (this.isClose()) { this.tagName = this.section(); this.setNode(); } } AttrName(c) { var blank = blankChar[c]; if (blank) { this.attrName = this.section(); c = this.data[this.i]; } if (c == "=") { if (!blank) this.attrName = this.section(); while (blankChar[this.data[++this.i]]); this.start = this.i--; this.state = this.AttrValue; } else if (blank) this.setAttr(); else if (this.isClose()) { this.attrName = this.section(); this.setAttr(); } } AttrValue(c) { if (c == '"' || c == "'") { this.start++; if ((this.i = this.data.indexOf(c, this.i + 1)) == -1) return (this.i = this.data.length); this.attrVal = this.section(); this.i++; } else { for (; !blankChar[this.data[this.i]] && !this.isClose(); this.i++); this.attrVal = this.section(); } this.setAttr(); } EndTag(c) { if (blankChar[c] || c == ">" || c == "/") { var name = this.getName(this.section()); for (var i = this.STACK.length; i--; ) if (this.STACK[i].name == name) break; if (i != -1) { var node; while ((node = this.STACK.pop()).name != name); this.popNode(node); } else if (name == "p" || name == "br") this.siblings().push({ name, attrs: {}, }); this.i = this.data.indexOf(">", this.i); this.start = this.i + 1; if (this.i == -1) this.i = this.data.length; else this.state = this.Text; } } } module.exports = MpHtmlParser;