Browse Source

Add asDocument to HTML parser

pull/5895/head
Artur Arseniev 2 years ago
parent
commit
12b85f06c3
  1. 1
      src/dom_components/model/ComponentWrapper.ts
  2. 20
      src/parser/config/config.ts
  3. 2
      src/parser/model/BrowserParserHtml.ts
  4. 107
      src/parser/model/ParserHtml.ts
  5. 9
      src/utils/dom.ts
  6. 64
      test/specs/parser/model/ParserHtml.ts

1
src/dom_components/model/ComponentWrapper.ts

@ -13,7 +13,6 @@ export default class ComponentWrapper extends Component {
draggable: false, draggable: false,
components: [], components: [],
traits: [], traits: [],
// In case we might need the doctype as component https://stackoverflow.com/a/10162353
doctype: '', doctype: '',
head: null, head: null,
stylable: [ stylable: [

20
src/parser/config/config.ts

@ -1,3 +1,5 @@
import { CssRuleJSON } from '../../css_composer/model/CssRule';
import { ComponentDefinitionDefined } from '../../dom_components/model/types';
import Editor from '../../editor'; import Editor from '../../editor';
export interface ParsedCssRule { export interface ParsedCssRule {
@ -11,6 +13,18 @@ export type CustomParserCss = (input: string, editor: Editor) => ParsedCssRule[]
export type CustomParserHtml = (input: string, options: HTMLParserOptions) => HTMLElement; export type CustomParserHtml = (input: string, options: HTMLParserOptions) => HTMLElement;
export interface HTMLParseResult {
html: ComponentDefinitionDefined | ComponentDefinitionDefined[];
css?: CssRuleJSON[];
doctype?: string;
root?: ComponentDefinitionDefined;
head?: ComponentDefinitionDefined;
}
export interface ParseNodeOptions extends HTMLParserOptions {
inSvg?: boolean;
}
export interface HTMLParserOptions { export interface HTMLParserOptions {
/** /**
* DOMParser mime type. * DOMParser mime type.
@ -20,6 +34,12 @@ export interface HTMLParserOptions {
*/ */
htmlType?: DOMParserSupportedType; htmlType?: DOMParserSupportedType;
/**
* Parse the string as HTML document. The result will include additional data (eg. doctype, head, etc.)
* @default false
*/
asDocument?: boolean;
/** /**
* Allow <script> tags. * Allow <script> tags.
* @default false * @default false

2
src/parser/model/BrowserParserHtml.ts

@ -13,6 +13,8 @@ export default (str: string, config: HTMLParserOptions = {}) => {
let res: HTMLElement; let res: HTMLElement;
if (toHTML) { if (toHTML) {
if (config.asDocument) return doc;
// Replicate the old parser in order to avoid breaking changes // Replicate the old parser in order to avoid breaking changes
const { head, body } = doc; const { head, body } = doc;
// Move all scripts at the bottom of the page // Move all scripts at the bottom of the page

107
src/parser/model/ParserHtml.ts

@ -1,17 +1,10 @@
import { each, isArray, isFunction, isUndefined } from 'underscore'; import { each, isArray, isFunction, isUndefined } from 'underscore';
import { ObjectAny } from '../../common'; import { ObjectAny, ObjectStrings } from '../../common';
import { CssRuleJSON } from '../../css_composer/model/CssRule';
import { ComponentDefinitionDefined } from '../../dom_components/model/types'; import { ComponentDefinitionDefined } from '../../dom_components/model/types';
import EditorModel from '../../editor/model/Editor'; import EditorModel from '../../editor/model/Editor';
import { HTMLParserOptions, ParserConfig } from '../config/config'; import { HTMLParseResult, HTMLParserOptions, ParseNodeOptions, ParserConfig } from '../config/config';
import BrowserParserHtml from './BrowserParserHtml'; import BrowserParserHtml from './BrowserParserHtml';
import { doctypeToString } from '../../utils/dom';
type StringObject = Record<string, string>;
type HTMLParseResult = {
html: ComponentDefinitionDefined | ComponentDefinitionDefined[]; // TODO replace with components
css?: CssRuleJSON[];
};
const modelAttrStart = 'data-gjs-'; const modelAttrStart = 'data-gjs-';
const event = 'parse:html'; const event = 'parse:html';
@ -50,7 +43,7 @@ const ParserHtml = (em?: EditorModel, config: ParserConfig & { returnArray?: boo
*/ */
splitPropsFromAttr(attr: ObjectAny = {}) { splitPropsFromAttr(attr: ObjectAny = {}) {
const props: ObjectAny = {}; const props: ObjectAny = {};
const attrs: StringObject = {}; const attrs: ObjectStrings = {};
each(attr, (value, key) => { each(attr, (value, key) => {
if (key.indexOf(this.modelAttrStart) === 0) { if (key.indexOf(this.modelAttrStart) === 0) {
@ -131,12 +124,59 @@ const ParserHtml = (em?: EditorModel, config: ParserConfig & { returnArray?: boo
return result; return result;
}, },
parseNodeAttr(node: HTMLElement, result?: ComponentDefinitionDefined) {
const model = result || {};
const attrs = node.attributes || [];
const attrsLen = attrs.length;
for (let i = 0; i < attrsLen; i++) {
const nodeName = attrs[i].nodeName;
let nodeValue: string | boolean = attrs[i].nodeValue!;
// Isolate attributes
if (nodeName == 'style') {
model.style = this.parseStyle(nodeValue);
} else if (nodeName == 'class') {
model.classes = this.parseClass(nodeValue);
} else if (nodeName == 'contenteditable') {
continue;
} else if (nodeName.indexOf(this.modelAttrStart) === 0) {
const propsResult = this.getPropAttribute(nodeName, nodeValue);
model[propsResult.name] = propsResult.value;
} else {
// @ts-ignore Check for attributes from props (eg. required, disabled)
if (nodeValue === '' && node[nodeName] === true) {
nodeValue = true;
}
if (!model.attributes) {
model.attributes = {};
}
model.attributes[nodeName] = nodeValue;
}
}
return model;
},
parseNode(node: HTMLElement, opts: ParseNodeOptions = {}) {
const result = this.parseNodeAttr(node);
const nodes = node.childNodes;
if (!!nodes.length) {
result.components = this.parseNodes(node, opts);
}
return result;
},
/** /**
* Get data from the node element * Get data from the node element
* @param {HTMLElement} el DOM element to traverse * @param {HTMLElement} el DOM element to traverse
* @return {Array<Object>} * @return {Array<Object>}
*/ */
parseNode(el: HTMLElement, opts: ObjectAny = {}) { parseNodes(el: HTMLElement, opts: ParseNodeOptions = {}) {
const result: ComponentDefinitionDefined[] = []; const result: ComponentDefinitionDefined[] = [];
const nodes = el.childNodes; const nodes = el.childNodes;
@ -228,7 +268,7 @@ const ParserHtml = (em?: EditorModel, config: ParserConfig & { returnArray?: boo
content: firstChild.nodeValue, content: firstChild.nodeValue,
}; };
} else { } else {
model.components = this.parseNode(node, { model.components = this.parseNodes(node, {
...opts, ...opts,
inSvg: opts.inSvg || model.type === 'svg', inSvg: opts.inSvg || model.type === 'svg',
}); });
@ -303,17 +343,25 @@ const ParserHtml = (em?: EditorModel, config: ParserConfig & { returnArray?: boo
parse(str: string, parserCss?: any, opts: HTMLParserOptions = {}) { parse(str: string, parserCss?: any, opts: HTMLParserOptions = {}) {
const conf = em?.get('Config') || {}; const conf = em?.get('Config') || {};
const res: HTMLParseResult = { html: [] }; const res: HTMLParseResult = { html: [] };
const cf: ObjectAny = { ...config, ...opts }; const cf = { ...config, ...opts };
const options = { const options = {
...config.optionsHtml, ...config.optionsHtml,
// @ts-ignore Support previous `configParser.htmlType` option // @ts-ignore Support previous `configParser.htmlType` option
htmlType: config.optionsHtml?.htmlType || config.htmlType, htmlType: config.optionsHtml?.htmlType || config.htmlType,
...opts, ...opts,
}; };
const { preParser } = options; const { preParser, asDocument } = options;
const input = isFunction(preParser) ? preParser(str, { editor: em?.getEditor()! }) : str; const input = isFunction(preParser) ? preParser(str, { editor: em?.getEditor()! }) : str;
const el = isFunction(cf.parserHtml) ? cf.parserHtml(input, options) : BrowserParserHtml(input, options); const parseRes = isFunction(cf.parserHtml) ? cf.parserHtml(input, options) : BrowserParserHtml(input, options);
const scripts = el.querySelectorAll('script'); let root = parseRes as HTMLElement;
const docEl = parseRes as Document;
if (asDocument) {
root = docEl.documentElement;
res.doctype = doctypeToString(docEl.doctype);
}
const scripts = root.querySelectorAll('script');
let i = scripts.length; let i = scripts.length;
// Support previous `configMain.allowScripts` option // Support previous `configMain.allowScripts` option
@ -321,32 +369,41 @@ const ParserHtml = (em?: EditorModel, config: ParserConfig & { returnArray?: boo
// Remove script tags // Remove script tags
if (!allowScripts) { if (!allowScripts) {
while (i--) scripts[i].parentNode.removeChild(scripts[i]); while (i--) scripts[i].parentNode?.removeChild(scripts[i]);
} }
// Remove unsafe attributes // Remove unsafe attributes
if (!options.allowUnsafeAttr || !options.allowUnsafeAttrValue) { if (!options.allowUnsafeAttr || !options.allowUnsafeAttrValue) {
this.__sanitizeNode(el, options); this.__sanitizeNode(root, options);
} }
// Detach style tags and parse them // Detach style tags and parse them
if (parserCss) { if (parserCss) {
const styles = el.querySelectorAll('style'); const styles = root.querySelectorAll('style');
let j = styles.length; let j = styles.length;
let styleStr = ''; let styleStr = '';
while (j--) { while (j--) {
styleStr = styles[j].innerHTML + styleStr; styleStr = styles[j].innerHTML + styleStr;
styles[j].parentNode.removeChild(styles[j]); styles[j].parentNode?.removeChild(styles[j]);
} }
if (styleStr) res.css = parserCss.parse(styleStr); if (styleStr) res.css = parserCss.parse(styleStr);
} }
em?.trigger(`${event}:root`, { input, root: el }); em?.trigger(`${event}:root`, { input, root: root });
const result = this.parseNode(el, cf); let resHtml: HTMLParseResult['html'] = [];
// I have to keep it otherwise it breaks the DomComponents.addComponent (returns always array)
const resHtml = result.length === 1 && !cf.returnArray ? result[0] : result; if (asDocument) {
res.head = this.parseNode(docEl.head, cf);
res.root = this.parseNodeAttr(root);
resHtml = this.parseNode(docEl.body, cf);
} else {
const result = this.parseNodes(root, cf);
// I have to keep it otherwise it breaks the DomComponents.addComponent (returns always array)
resHtml = result.length === 1 && !cf.returnArray ? result[0] : result;
}
res.html = resHtml; res.html = resHtml;
em?.trigger(event, { input, output: res }); em?.trigger(event, { input, output: res });

9
src/utils/dom.ts

@ -205,6 +205,15 @@ export const hasCtrlKey = (ev: WheelEvent) => ev.ctrlKey;
export const hasModifierKey = (ev: WheelEvent) => hasCtrlKey(ev) || ev.metaKey; export const hasModifierKey = (ev: WheelEvent) => hasCtrlKey(ev) || ev.metaKey;
// Ref: https://stackoverflow.com/a/10162353
export const doctypeToString = (dt?: DocumentType | null) => {
if (!dt) return '';
const { name, publicId, systemId } = dt;
const pubId = publicId ? ` PUBLIC "${publicId}"` : '';
const sysId = !publicId && systemId ? ` SYSTEM "${systemId}"` : '';
return `<!DOCTYPE ${name}${pubId}${sysId}>`;
};
export const on = <E extends Event = Event>( export const on = <E extends Event = Event>(
el: EventTarget | EventTarget[], el: EventTarget | EventTarget[],
ev: string, ev: string,

64
test/specs/parser/model/ParserHtml.ts

@ -654,5 +654,69 @@ describe('ParserHtml', () => {
const preParser = (str: string) => str.replace('javascript:', 'test:'); const preParser = (str: string) => str.replace('javascript:', 'test:');
expect(obj.parse(str, null, { preParser }).html).toEqual([result]); expect(obj.parse(str, null, { preParser }).html).toEqual([result]);
}); });
test.only('parsing as document', () => {
const str = `
<!DOCTYPE html>
<html class="cls-html" lang="en" data-gjs-htmlp="true">
<head class="cls-head" data-gjs-headp="true">
<meta charset="utf-8">
<title>Test</title>
<link rel="stylesheet" href="/noop.css">
<!-- comment -->
<script src="/noop.js"></script>
<style>.test { color: red }</style>
</head>
<body class="cls-body" data-gjs-bodyp="true">
<h1>H1</h1>
</body>
</html>
`;
// asDocument: true
console.log(obj.parse(str, null, { asDocument: true }));
expect(obj.parse(str, null, { asDocument: true })).toEqual({
doctype: '<!DOCTYPE html>',
root: { classes: ['cls-html'], attributes: { lang: 'en' }, htmlp: true },
head: {
type: 'head',
headp: true,
classes: ['cls-head'],
components: [
{ tagName: 'meta', attributes: { charset: 'utf-8' } },
{
tagName: 'title',
type: 'text',
components: { type: 'textnode', content: 'Test' },
},
{
tagName: 'link',
attributes: { rel: 'stylesheet', href: '/noop.css' },
},
{
type: 'comment',
tagName: '',
content: ' comment ',
},
{
tagName: 'style',
type: 'text',
components: { type: 'textnode', content: '.test { color: red }' },
},
],
},
html: {
tagName: 'body',
bodyp: true,
classes: ['cls-body'],
components: [
{
tagName: 'h1',
type: 'text',
components: { type: 'textnode', content: 'H1' },
},
],
},
});
});
}); });
}); });

Loading…
Cancel
Save