Skip to content

Instantly share code, notes, and snippets.

@claus
Last active April 4, 2022 16:40
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save claus/601d791f423fd48d8bdb7383b2ee2cf5 to your computer and use it in GitHub Desktop.
Save claus/601d791f423fd48d8bdb7383b2ee2cf5 to your computer and use it in GitHub Desktop.
Convert HTML to Contentful Rich Text Document object structure
import { parse as parseHtml } from 'node-html-parser';
import { MARKS, BLOCKS, INLINES } from '@contentful/rich-text-types';
const tagMap = new Map([
['p', { type: 'block', nodeType: BLOCKS.PARAGRAPH }],
['h1', { type: 'block', nodeType: BLOCKS.HEADING_1 }],
['h2', { type: 'block', nodeType: BLOCKS.HEADING_2 }],
['h3', { type: 'block', nodeType: BLOCKS.HEADING_3 }],
['h4', { type: 'block', nodeType: BLOCKS.HEADING_4 }],
['h5', { type: 'block', nodeType: BLOCKS.HEADING_5 }],
['h6', { type: 'block', nodeType: BLOCKS.HEADING_6 }],
['ol', { type: 'block', nodeType: BLOCKS.OL_LIST }],
['ul', { type: 'block', nodeType: BLOCKS.UL_LIST }],
['li', { type: 'block', nodeType: BLOCKS.LIST_ITEM }],
['hr', { type: 'block', nodeType: BLOCKS.HR }],
['blockquote', { type: 'block', nodeType: BLOCKS.QUOTE }],
['b', { type: 'mark', nodeType: MARKS.BOLD }],
['i', { type: 'mark', nodeType: MARKS.ITALIC }],
['u', { type: 'mark', nodeType: MARKS.UNDERLINE }],
['strong', { type: 'mark', nodeType: MARKS.BOLD }],
['em', { type: 'mark', nodeType: MARKS.ITALIC }],
['code', { type: 'mark', nodeType: MARKS.CODE }],
]);
function parse(html) {
return convert(parseHtml(html));
}
function convert(node, marks = []) {
if (node.nodeType === 1) {
if (node.parentNode == null) {
const content = node.childNodes
.map(node => convert(node))
.filter(rt => Boolean(rt))
.flat();
return {
nodeType: 'document',
content,
data: {},
};
} else {
const tagName = node.rawTagName.toLowerCase();
if (tagMap.has(tagName)) {
const { type, nodeType } = tagMap.get(tagName);
if (type === 'block') {
const content = node.childNodes
.map(node => convert(node))
.filter(rt => Boolean(rt))
.flat();
return {
nodeType,
content,
data: {},
};
} else {
return node.childNodes
.map(node => convert(node, [...marks, nodeType]))
.filter(rt => Boolean(rt))
.flat();
}
} else {
if (tagName === 'a') {
const data = {};
const id = node.getAttribute('id');
const href = node.getAttribute('href');
if (href) {
data.uri = href;
} else if (id) {
data.target = {
sys: {
id,
type: 'Link',
linkType: 'Entry',
},
};
}
const content = node.childNodes
.map(node => convert(node, marks))
.filter(rt => Boolean(rt))
.flat();
return {
nodeType: href
? INLINES.HYPERLINK
: INLINES.ENTRY_HYPERLINK,
content,
data,
};
} else {
return node.childNodes
.map(node => convert(node, marks))
.filter(rt => Boolean(rt))
.flat();
}
}
}
} else if (node.nodeType === 3) {
return {
nodeType: 'text',
value: node.textContent,
marks: marks.map(type => ({ type })),
data: {},
};
}
return null;
}
export default parse;
@claus
Copy link
Author

claus commented Apr 1, 2022

Usage:

const richTextDoc = parse('<p>The <b>quick brown <i>fox</i></b> jumps over a lazy dog.</p>');

Returns:

{
    "nodeType": "document",
    "content": [
        {
            "nodeType": "paragraph",
            "content": [
                {
                    "nodeType": "text",
                    "value": "The ",
                    "marks": [],
                    "data": {}
                },
                {
                    "nodeType": "text",
                    "value": "quick brown ",
                    "marks": [
                        {
                            "type": "bold"
                        }
                    ],
                    "data": {}
                },
                {
                    "nodeType": "text",
                    "value": "fox",
                    "marks": [
                        {
                            "type": "bold"
                        },
                        {
                            "type": "italic"
                        }
                    ],
                    "data": {}
                },
                {
                    "nodeType": "text",
                    "value": " jumps over a lazy dog.",
                    "marks": [],
                    "data": {}
                }
            ],
            "data": {}
        }
    ],
    "data": {}
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment