Skip to content

Instantly share code, notes, and snippets.

@kmelve
Created September 15, 2021 21:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kmelve/06d0338f2c227fcfa7f987cc32e4e546 to your computer and use it in GitHub Desktop.
Save kmelve/06d0338f2c227fcfa7f987cc32e4e546 to your computer and use it in GitHub Desktop.
Deserializing Transcript HTML into Portable Text
const blockTools = require("@sanity/block-tools").default;
const jsdom = require("jsdom");
const { JSDOM } = jsdom;
const HTML_SPAN_TAGS = {
span: {object: 'text'},
}
const defaultSchema = require("./defaultSchema");
function tagName(el) {
if (!el || el.nodeType !== 1) {
return undefined
}
return el.tagName.toLowerCase()
}
const blockContentType = defaultSchema
.get("blogPost")
.fields.find((field) => field.name === "transcript").type;
function parseHTML(HTMLDoc) {
const rules = [
{
deserialize(el, next, block) {
if (!el) {
return undefined;
}
if (el.tagName.toLowerCase() !== "span") {
return undefined;
}
const begin = el.getAttribute("begin");
console.log(begin)
console.log(el.textContent)
if (!begin) {
return next(el.childNodes);
}
const markDef = {
_key: blockTools.randomKey(),
_type: "timestamp",
time: begin
};
return {
_type: '__annotation',
markDef: markDef,
children: next(el.childNodes),
}
}
},
];
/**
* Since we're in a node context, we need
* to give block-tools JSDOM in order to
* parse the HTML DOM elements
*/
return HTMLDoc
? blockTools.htmlToBlocks(HTMLDoc, blockContentType, {
rules,
parseHtml: (html) => new JSDOM(html).window.document
})
: [];
}
module.exports = parseHTML;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment