From b62c97bfbdca5df254cb1708b8a125c5dc9c1827 Mon Sep 17 00:00:00 2001 From: Artur Arseniev Date: Sat, 27 Apr 2024 16:13:39 +0400 Subject: [PATCH] Refactor HTML parser --- src/dom_components/model/Component.ts | 2 +- src/parser/config/config.ts | 1 + src/parser/index.ts | 2 +- src/parser/model/ParserHtml.ts | 238 +++++++++++--------------- test/specs/parser/model/ParserHtml.ts | 2 +- 5 files changed, 107 insertions(+), 138 deletions(-) diff --git a/src/dom_components/model/Component.ts b/src/dom_components/model/Component.ts index 3f54433ba..ab2f1028f 100644 --- a/src/dom_components/model/Component.ts +++ b/src/dom_components/model/Component.ts @@ -2039,7 +2039,7 @@ export default class Component extends StyleableModel { return result(this.prototype, 'defaults'); } - static isComponent(el: HTMLElement): ComponentDefinitionDefined | boolean | undefined { + static isComponent(el: HTMLElement, opts?: any): ComponentDefinitionDefined | boolean | undefined { return { tagName: toLowerCase(el.tagName) }; } diff --git a/src/parser/config/config.ts b/src/parser/config/config.ts index 06ea3b856..060747fa5 100644 --- a/src/parser/config/config.ts +++ b/src/parser/config/config.ts @@ -23,6 +23,7 @@ export interface HTMLParseResult { export interface ParseNodeOptions extends HTMLParserOptions { inSvg?: boolean; + skipChildren?: boolean; } export interface HTMLParserOptions { diff --git a/src/parser/index.ts b/src/parser/index.ts index e33775a42..c7c8b6048 100644 --- a/src/parser/index.ts +++ b/src/parser/index.ts @@ -69,7 +69,7 @@ export default class ParserModule extends Module { return { - compTypes: '', + compTypes: [] as ComponentStackItem[], modelAttrStart, @@ -133,7 +133,6 @@ const ParserHtml = (em?: EditorModel, config: ParserConfig & { returnArray?: boo const nodeName = attrs[i].nodeName; let nodeValue: string | boolean = attrs[i].nodeValue!; - // Isolate attributes if (nodeName == 'style') { model.style = this.parseStyle(nodeValue); } else if (nodeName == 'class') { @@ -160,15 +159,105 @@ const ParserHtml = (em?: EditorModel, config: ParserConfig & { returnArray?: boo return model; }, + detectNode(node: HTMLElement, opts: ParseNodeOptions = {}) { + const { compTypes } = this; + let result: ComponentDefinitionDefined = {}; + + if (compTypes) { + let obj; + const type = node.getAttribute?.(`${this.modelAttrStart}type`); + + // If the type is already defined, use it + if (type) { + result = { type }; + } else { + // Find the component type + for (let i = 0; i < compTypes.length; i++) { + const compType = compTypes[i]; + obj = compType.model.isComponent(node, opts); + + if (obj) { + if (typeof obj !== 'object') { + obj = { type: compType.id }; + } + break; + } + } + + result = obj as ComponentDefinitionDefined; + } + } + + return result; + }, + parseNode(node: HTMLElement, opts: ParseNodeOptions = {}) { - const result = this.parseNodeAttr(node); const nodes = node.childNodes; + const nodesLen = nodes.length; + let model = this.detectNode(node, opts); - if (!!nodes.length) { - result.components = this.parseNodes(node, opts); + if (!model.tagName) { + const tag = node.tagName || ''; + const ns = node.namespaceURI || ''; + model.tagName = tag && ns === 'http://www.w3.org/1999/xhtml' ? tag.toLowerCase() : tag; } - return result; + model = this.parseNodeAttr(node, model); + + // Check for custom void elements (valid in XML) + if (!nodesLen && `${node.outerHTML}`.slice(-2) === '/>') { + model.void = true; + } + + // Check for nested elements but avoid it if already provided + if (nodesLen && !model.components && !opts.skipChildren) { + // Avoid infinite nested text nodes + const firstChild = nodes[0]; + + // If there is only one child and it's a TEXTNODE + // just make it content of the current node + if (nodesLen === 1 && firstChild.nodeType === 3) { + !model.type && (model.type = 'text'); + model.components = { + type: 'textnode', + content: firstChild.nodeValue, + }; + } else { + model.components = this.parseNodes(node, { + ...opts, + inSvg: opts.inSvg || model.type === 'svg', + }); + } + } + + // If all children are texts and there is any textnode inside, the parent should + // be text too otherwise it won't be possible to edit texnodes. + const comps = model.components; + if (!model.type && comps?.length) { + const { textTypes = [], textTags = [] } = config; + let allTxt = true; + let foundTextNode = false; + + for (let i = 0; i < comps.length; i++) { + const comp = comps[i]; + const cType = comp.type; + + if (!textTypes.includes(cType) && !textTags.includes(comp.tagName)) { + allTxt = false; + break; + } + + if (cType === 'textnode') { + foundTextNode = true; + } + } + + if (allTxt && foundTextNode) { + model.type = 'text'; + } + } + + return model; }, /** @@ -179,105 +268,16 @@ const ParserHtml = (em?: EditorModel, config: ParserConfig & { returnArray?: boo parseNodes(el: HTMLElement, opts: ParseNodeOptions = {}) { const result: ComponentDefinitionDefined[] = []; const nodes = el.childNodes; + const nodesLen = nodes.length; - for (var i = 0, len = nodes.length; i < len; i++) { + for (let i = 0; i < nodesLen; i++) { const node = nodes[i] as HTMLElement; - const attrs = node.attributes || []; - const attrsLen = attrs.length; const nodePrev = result[result.length - 1]; - const nodeChild = node.childNodes.length; - const ct = this.compTypes; - let model: ComponentDefinitionDefined = {}; // TODO use component properties - - // Start with understanding what kind of component it is - if (ct) { - let obj: any = ''; - let type = node.getAttribute && node.getAttribute(`${this.modelAttrStart}type`); - - // If the type is already defined, use it - if (type) { - model = { type }; - } else { - // Iterate over all available Component Types and - // the first with a valid result will be that component - for (let it = 0; it < ct.length; it++) { - const compType = ct[it]; - // @ts-ignore - obj = compType.model.isComponent(node, opts); - - if (obj) { - if (typeof obj !== 'object') { - // @ts-ignore - obj = { type: compType.id }; - } - break; - } - } - - model = obj; - } - } - - // Set tag name if not yet done - if (!model.tagName) { - const tag = node.tagName || ''; - const ns = node.namespaceURI || ''; - model.tagName = tag && ns === 'http://www.w3.org/1999/xhtml' ? tag.toLowerCase() : tag; - } - - if (attrsLen) { - model.attributes = {}; - } - - // Parse attributes - for (let j = 0; j < attrsLen; j++) { - const nodeName = attrs[j].nodeName; - let nodeValue: string | boolean = attrs[j].nodeValue!; - - // Isolate attributes - if (nodeName == 'style') { - model.style = this.parseStyle(nodeValue); - } else if (nodeName == 'class') { - model.classes = this.parseClass(nodeValue); - } else if (nodeName == 'contenteditable') { - continue; - } else if (nodeName.indexOf(this.modelAttrStart) === 0) { - const propsResult = this.getPropAttribute(nodeName, nodeValue); - model[propsResult.name] = propsResult.value; - } else { - // @ts-ignore Check for attributes from props (eg. required, disabled) - if (nodeValue === '' && node[nodeName] === true) { - nodeValue = true; - } + const model = this.parseNode(node, opts); - model.attributes[nodeName] = nodeValue; - } - } - - // Check for nested elements but avoid it if already provided - if (nodeChild && !model.components) { - // Avoid infinite nested text nodes - const firstChild = node.childNodes[0]; - - // If there is only one child and it's a TEXTNODE - // just make it content of the current node - if (nodeChild === 1 && firstChild.nodeType === 3) { - !model.type && (model.type = 'text'); - model.components = { - type: 'textnode', - content: firstChild.nodeValue, - }; - } else { - model.components = this.parseNodes(node, { - ...opts, - inSvg: opts.inSvg || model.type === 'svg', - }); - } - } - - // Check if it's a text node and if could be moved to the prevous model - if (model.type == 'textnode') { - if (nodePrev && nodePrev.type == 'textnode') { + // Check if it's a text node and if it could be moved to the prevous one + if (model.type === 'textnode') { + if (nodePrev?.type === 'textnode') { nodePrev.content += model.content; continue; } @@ -291,39 +291,7 @@ const ParserHtml = (em?: EditorModel, config: ParserConfig & { returnArray?: boo } } - // Check for custom void elements (valid in XML) - if (!nodeChild && `${node.outerHTML}`.slice(-2) === '/>') { - model.void = true; - } - - // If all children are texts and there is some textnode the parent should - // be text too otherwise I'm unable to edit texnodes - const comps = model.components; - if (!model.type && comps) { - const { textTypes = [], textTags = [] } = config; - let allTxt = 1; - let foundTextNode = 0; - - for (let ci = 0; ci < comps.length; ci++) { - const comp = comps[ci]; - const cType = comp.type; - - if (!textTypes.includes(cType) && !textTags.includes(comp.tagName)) { - allTxt = 0; - break; - } - - if (cType === 'textnode') { - foundTextNode = 1; - } - } - - if (allTxt && foundTextNode) { - model.type = 'text'; - } - } - - // If tagName is still empty and is not a textnode, do not push it + // If the tagName is empty and it's not a textnode, skip it if (!model.tagName && isUndefined(model.content)) { continue; } diff --git a/test/specs/parser/model/ParserHtml.ts b/test/specs/parser/model/ParserHtml.ts index c3d2ca3d0..96e0b8b2a 100644 --- a/test/specs/parser/model/ParserHtml.ts +++ b/test/specs/parser/model/ParserHtml.ts @@ -15,7 +15,7 @@ describe('ParserHtml', () => { textTypes: ['text', 'textnode', 'comment'], returnArray: true, }); - obj.compTypes = dom.componentTypes as any; + obj.compTypes = dom.componentTypes; }); test('Simple div node', () => {