Skip to content

Instantly share code, notes, and snippets.

@herrstrietzel
Last active April 10, 2024 17:01
Show Gist options
  • Save herrstrietzel/aefd72c17919522dc39ebbee481986ea to your computer and use it in GitHub Desktop.
Save herrstrietzel/aefd72c17919522dc39ebbee481986ea to your computer and use it in GitHub Desktop.
SO: HTML text to SVG
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Convert HTML text to SVG</title>
<link href="https://fonts.googleapis.com/css2?family=Noto+Serif:ital,wdth,wght@0,62.5..100,100..900;1,62.5..100,100..900&amp;display=swap" rel="stylesheet" />
<link rel="stylesheet" href="./style.css">
</head>
<body>
<h1>Convert HTML text to SVG</h1>
<p><label for="">Text width:</label><input id="inpWidth" value="50" min="10" max="100" type="range" step="1"></p>
<h3>html text</h3>
<div lang="de" class="foreignBody resize" xmlns="http://www.w3.org/1999/xhtml" style="margin:0em; padding:1em">
<p style="text-align:center" class="author">Franz Kafka</p>
<h1 style="text-align:center">The Metamorphosis</h1>
<div class="columns">
<p>One morning, when <strong>Gregor Samsa</strong> woke from <em>troubled</em> dreams, he found
himself
transformed in
his bed into <em style="color:red; letter-spacing:0.1em">a horrible</em>
vermin.<sup>1</sup>
</p>
<p>He lay on his armour-like back, and if he lifted his head a little he could see his brown belly,
slightly
domed and divided by arches <strong><a href="https://stackoverflow.com/" style="color:green">link: into
stiff
sections.</a> </strong> The bed&shy;ding was hardly
able to
cover it and seemed ready to slide off any moment.</p>
</div>
</div>
<h3>Output (svg)</h3>
<div id="svgWrp"></div>
<h3>SVG markup</h3>
<textarea id="output"></textarea>
<script src="script.js"></script>
<script>
// wait for fonts
document.fonts.ready.then(function () {
const htmlEl = document.querySelector(".foreignBody");
const lineDataEl = document.getElementById("lineDataText");
let svg;
let textNodes = getTextNodesInEL(htmlEl);
// init
htmlEl.style.width = inpWidth.value + '%';
updateSVG(htmlEl, textNodes)
// resize by range slider
inpWidth.addEventListener("input", (e) => {
htmlEl.style.width = e.currentTarget.value + '%';
updateSVG(htmlEl, textNodes)
});
// resize
htmlEl.addEventListener("mouseup", (e) => {
updateSVG(htmlEl, textNodes)
});
function updateSVG(htmlEl, textNodes) {
svg = html2SvgText(htmlEl, textNodes);
svgWrp.innerHTML = "";
svgWrp.append(svg);
let svgMarkup = new XMLSerializer().serializeToString(svg).
replaceAll('&quot;', '\'').
replaceAll('</', '\n</').
replaceAll('><', '>\n<')
output.value = svgMarkup;
console.log(new XMLSerializer().serializeToString(svg));
}
});
</script>
</body>
</html>
/**
* Convert HTML text elements
* to svg text
*/
function html2SvgText(htmlEl, textNodes = null) {
// get all text nodes in element: either passed by argument or retrieved from element
textNodes = textNodes === null ? getTextNodesInEL(htmlEl) : textNodes;
const removeWhiteSpace = (textNode) => {
// remove new lines, tabs and leading/trailing space
textNode.nodeValue =
textNode.nodeValue
.replace(/[\n\r\t]/g, " ")
//.replace(/[\t]/g, " ")
.replace(/\ {2,}/g, " ")
.trim() + " ";
return textNode;
};
// parent boundaries to get global x/y offsets for svg elements
let bb = htmlEl.getBoundingClientRect();
// here we'll store all our lines
let textNodeObj = {
xOffset: bb.x,
yOffset: bb.y,
width: bb.width,
height: bb.height,
textNodeData: [],
// define style props for rendering
styleProps: {
fontFamily: [],
fontSize: [16],
fontWeight: ["400", "normal"],
fontStyle: ["normal"],
fontStretch: ["100%"],
color: ["rgb(0, 0, 0)"],
letterSpacing: ["normal"],
textDecoration: ["none", "none solid rgb(0, 0, 0)"],
textTransform: ["none"]
}
};
/**
* get boundaries of text nodes
*/
textNodes.forEach((textNode, i) => {
removeWhiteSpace(textNode);
let parent = textNode.parentElement;
// set parent element id to identify element shifts
parent.dataset.id = parent.nodeName.toLowerCase() + "_" + i;
getTextNodeBboxes(textNode, textNodeObj);
});
/**
* translate values to svg baseline offsets
*/
let { xOffset, yOffset } = textNodeObj;
//count lines
let lineNum = 1;
textNodeObj.textNodeData.forEach((line, i) => {
// approximated descender height: height from bbox top to text baseline
let baseLineShift = line.style.fontSize * 0.25;
line.x = line.x - xOffset;
line.y = line.y - yOffset + line.height - baseLineShift;
let linePrev =
i > 0 ? textNodeObj.textNodeData[i - 1] : textNodeObj.textNodeData[i];
if (line.y > linePrev.y) {
lineNum++;
}
// add line num
line.lineNum = lineNum;
});
// render svg
let svgEl = renderSVGText(textNodeObj);
console.log(textNodeObj);
return svgEl;
}
function getTextNodeBboxes(node, textNodeObj) {
let lastYTop, lastLeft;
let parentElement = node.parentElement;
let parentId = parentElement.dataset.id;
let parentType = parentId.split("_")[0];
// weird fix for Firefox - dunno why
parentElement.style.display = "inline-block";
parentElement.getBoundingClientRect();
parentElement.style.removeProperty("display");
let words = node.nodeValue.split(" ").filter(Boolean);
// get style from parent element
let style = window.getComputedStyle(node.parentElement);
let { styleProps } = textNodeObj;
let textNodeDatatyle = {};
for (propName in styleProps) {
let propValue =
propName === "fontSize" ? parseFloat(style["fontSize"]) : style[propName];
textNodeDatatyle[propName] = propValue;
}
// initial position - get line height
let range = document.createRange();
range.setStart(node, 0);
range.setEnd(node, 1);
// bbox from first character: single line height
let bb_start = range.getBoundingClientRect();
let word = node.textContent;
// has line break? check end of node bbox
range.setStart(node, 0);
range.setEnd(node, node.length - 1);
let bb_end = range.getBoundingClientRect();
// base line height
let lineHeight = bb_start.height;
let isMultiline = bb_end.height > bb_start.height;
// ignore empty strings e.g new lines
let isNewLine = /[\n|\r]/g.test(word);
//let newLineChar = isNewLine ? '\n' :''
let empty = word.trim() === "";
if (empty && !isNewLine) {
return false;
}
// single line – no hyphenations
if (!isMultiline) {
textNodeObj.textNodeData.push({
text: word,
x: bb_start.left,
y: bb_start.top,
height: bb_end.height,
style: textNodeDatatyle,
hyphenated: false,
parentId: parentId,
href: parentType === "a" ? parentElement.getAttribute("href") : ""
});
}
// multine: refine search on word layer
else {
// loop words
let start = 0,
end = 1;
for (let i = 0; i < words.length; i++) {
word = words[i];
end = start + word.length;
// get range bbox
range.setStart(node, start);
range.setEnd(node, end);
let rangeBB = range.getBoundingClientRect();
// has linebreak? split textNodeData
let hasLinebreak = rangeBB.height > lineHeight;
// no line breaks = no hyphenation => concatenate
if (!hasLinebreak) {
let textNodeData = textNodeObj.textNodeData;
let prev = textNodeData.length
? textNodeData[textNodeData.length - 1]
: textNodeData[0];
// no line break – concatenate text
if (i > 0 && rangeBB.top === prev.y) {
textNodeObj.textNodeData[textNodeData.length - 1].text += word + " ";
}
// add new item
else {
textNodeObj.textNodeData.push({
text: word + " ",
x: rangeBB.x,
y: rangeBB.top,
height: rangeBB.height,
style: textNodeDatatyle,
parentId: parentId,
hyphenated: false,
href: parentType === "a" ? parentElement.getAttribute("href") : ""
});
}
}
// has line breaks: my contain hyphenations
else {
let startChar = end - word.length + 1;
let endChar = startChar + 1;
lastYTop = rangeBB.top;
lastLeft = rangeBB.left;
let splitIndices = [0];
// loop characters
let chars = word.split("").filter(Boolean);
let hyphenated = true;
let has_hyphenChar = /[-|–]/g.test(word); // hyphen or endash
for (let c = 0; c < chars.length - 1; c++) {
endChar = startChar;
range.setStart(node, startChar);
range.setEnd(node, endChar);
rangeBB = range.getBoundingClientRect();
// check empty trailing characters
let char = chars[c];
let is_empty = char.trim() !== char;
// is hyphenated
if ((rangeBB.top > lastYTop || rangeBB.top < lastYTop) && !is_empty) {
let prevIndex = splitIndices[splitIndices.length - 1];
let sub = word.substr(prevIndex, c - prevIndex);
// sub word bbox
range.setStart(node, start);
range.setEnd(node, start + sub.length);
let rangeBB2 = range.getBoundingClientRect();
// webkit fix
if (rangeBB2.left === lastLeft || has_hyphenChar) {
//hyphenated = false;
}
// column change
if (rangeBB.top < lastYTop) {
}
textNodeObj.textNodeData.push({
// remove soft hyphens
text: sub.replace(/\u00AD/g, ""),
x: rangeBB2.left,
y: rangeBB2.top,
height: rangeBB2.height,
style: textNodeDatatyle,
parentId: parentId,
hyphenated: has_hyphenChar ? false : hyphenated,
href: parentType === "a" ? parentElement.getAttribute("href") : ""
});
splitIndices.push(c);
lastYTop = rangeBB2.top;
}
lastLeft = lastLeft;
lastYTop = rangeBB.top;
startChar += 1;
}
//last
textNodeObj.textNodeData.push({
// append space
text:
word.substr(splitIndices[splitIndices.length - 1], word.length) +
" ",
x: lastLeft,
y: lastYTop,
height: rangeBB.height,
style: textNodeDatatyle,
parentId: parentId,
hyphenated: false,
href: parentType === "a" ? parentElement.getAttribute("href") : ""
});
}
//update char pos
start = end + 1;
}
}
}
/**
* render svg text and textNodeData
*/
function renderSVGText(textNodeObj, decimals = 1) {
//needed to adjust coordinates
let { width, height, styleProps, textNodeData } = textNodeObj;
[width, height] = [width, height].map((val) => {
return Math.ceil(val);
});
let item0 = textNodeData[0];
let lastTspanY = item0.y;
let lastStyle = item0.style;
// create svg elements
const ns = "http://www.w3.org/2000/svg";
const svg = document.createElementNS(ns, "svg");
svg.setAttribute("viewBox", [0, 0, width, height].join(" "));
svg.setAttribute("width", width);
svg.setAttribute("height", height);
// wrap in group
const gText = document.createElementNS(ns, "g");
gText.classList.add("gText");
svg.append(gText);
// create svg text element to emulate HTML paragraph
let svgText = document.createElementNS(ns, "text");
svgText.textContent = "";
svgText.setAttribute(
"style",
`font-family:${item0.style.fontFamily}; font-size:${item0.style.fontSize}px; font-weight:${item0.style.fontWeight};`
);
svgText.setAttribute("x", item0.x);
svgText.setAttribute("y", item0.y);
gText.append(svgText);
let tspan = document.createElementNS(ns, "tspan");
svgText.append(tspan);
let baseStyle = {
fontFamily: item0.style.fontFamily,
fontStyle: "normal",
fontWeight: 400,
fontSize: item0.style.fontSize
};
let baseStyleStr = Object.values(baseStyle).join("");
textNodeData.forEach((item, i) => {
let prev = i > 0 ? textNodeData[i - 1] : textNodeData[i];
let next =
i < textNodeData.length - 2
? textNodeData[i + 1]
: textNodeData[textNodeData.length - 1];
let styleStr = Object.values(item.style).join("");
let styleStrPrev = Object.values(lastStyle).join("");
let tspanNew = document.createElementNS(ns, "tspan");
let colBreak = prev.y > item.y;
// we need to adjust y values to match the baseline
fontSize = item.style.fontSize;
svgBaselineY = item.y;
//not same line create new tspan
let sameStyle = styleStr === styleStrPrev;
let sameY = svgBaselineY === lastTspanY;
// add links
if (item.parent === "a") {
// add link and new text el
let link = document.createElementNS(ns, "a");
link.setAttribute("href", item.href);
svgText = document.createElementNS(ns, "text");
tspanNew = document.createElementNS(ns, "tspan");
tspanNew.textContent = item.text;
svgText.setAttribute(
"style",
`font-family: ${baseStyle.fontFamily}; font-size: ${baseStyle.fontSize}px; font-weight: ${baseStyle.fontWeight};`
);
//let dy = +(item.y - prev.y).toFixed(1)
tspanNew.setAttribute("x", +item.x.toFixed(decimals));
tspanNew.setAttribute("y", item.y);
tspanNew.classList.add("tspan-a");
// append link
gText.append(link);
svgText.append(tspanNew);
link.append(svgText);
if (next.parent !== "a") {
// next text el after link
svgText = document.createElementNS(ns, "text");
svgText.classList.add("p-a");
svgText.setAttribute(
"style",
`font-family: ${baseStyle.fontFamily}; font-size: ${baseStyle.fontSize}px; font-weight: ${baseStyle.fontWeight};`
);
svgText.setAttribute("x", item.x);
svgText.setAttribute("y", item.y);
gText.append(svgText);
tspan = tspanNew;
}
tspan = tspanNew;
} else if ((i > 0 && !sameY && !item.hypenated) || !sameStyle) {
tspanNew.textContent = item.text;
let dy = +(item.y - prev.y).toFixed(decimals);
// omit x/dy values if on same line and not after column shift
if (prev.lineNum !== item.lineNum || prev.parent === "a" || colBreak) {
tspanNew.setAttribute("x", +item.x.toFixed(decimals));
}
if (dy) {
tspanNew.setAttribute("dy", dy);
}
svgText.append(tspanNew);
tspan = tspanNew;
}
// same line/style – append content to previous tspan
else {
tspan.textContent += item.text;
}
// append hyphen tspan
let tspanHyphen;
if (item.hyphenated) {
tspanHyphen = document.createElementNS(ns, "tspan");
tspanHyphen.classList.add("tspanHyphen");
tspanHyphen.setAttribute("aria-hidden", "true");
tspanHyphen.style.userSelect = "none";
tspanHyphen.textContent = "-";
svgText.append(tspanHyphen);
}
// apply styles if different from base style or previous
if (baseStyleStr !== styleStr || styleStrPrev !== styleStr) {
for (propName in styleProps) {
let propValue = item.style[propName];
propValue = propName === "fontSize" ? parseFloat(propValue) : propValue;
let propDefaults = styleProps[propName];
let unit = propName === "fontSize" ? "px" : "";
propName = propName === "color" ? "fill" : propName;
//set styles - ignore defaults
if (
propDefaults.length &&
!propDefaults.includes(propValue) &&
propValue.toString().indexOf("none") === -1 &&
propValue !== baseStyle[propName]
) {
tspan.style[propName] = propValue + unit;
if (item.hyphenated) {
tspanHyphen.style[propName] = propValue + unit;
}
}
}
}
// update y
lastTspanY = item.y;
lastStyle = item.style;
});
return svg;
}
// text helpers
function getTextNodesInEL(el) {
const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT, null);
const nodes = [];
while (walker.nextNode()) {
nodes.push(walker.currentNode);
}
return nodes;
}
body{
font-family: 'Fira Sans', 'Open Sans','Segoe UI', sans-serif
}
svg {
overflow: visible;
border: 1px solid #ccc;
}
.resize {
resize: both;
border: 1px solid #ccc;
overflow: auto;
}
.foreignBody {
font-family: "Noto Serif", serif;
font-weight: 400;
font-style: normal;
font-size: 1em;
line-height: 1.7em;
}
h1 {
font-size: 2em;
line-height: 1.2em;
margin: 0 0 1rem 0;
font-stretch: 50%;
text-transform: uppercase;
}
.author {
line-height: 1.2em;
font-style: italic;
margin-bottom: 0em;
}
p {
margin: 0 0 1rem 0;
hyphens: auto;
-webkit-hyphens: auto;
}
.columns {
column-count: 2;
column-gap: 1em;
}
textarea {
display: block;
width: 100%;
min-height: 15em;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment