Skip to content

Instantly share code, notes, and snippets.

@unphased
Created April 14, 2024 10:06
Show Gist options
  • Save unphased/628a116cdde6f2d3f49bbe04a8e3d555 to your computer and use it in GitHub Desktop.
Save unphased/628a116cdde6f2d3f49bbe04a8e3d555 to your computer and use it in GitHub Desktop.
ANSI escape sequence to HTML, for illustrative purposes
export const htmlEscape = (str) => {
return str.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
};
export const convertAnsiHtml = (ansi: string) => {
const html_a: string[] = [];
const cleaned_a: string[] = [];
const index_a: number[][] = []; // start locations of escape sequences
const len_a: number[][] = []; // lengths of escape sequences. use these to reconstruct modifications done based on math on cleaned string for raw input (or html) string.
ansi.split('\n').forEach((line) => {
let match: RegExpExecArray | null;
let html = '';
let cleaned = '';
let idx = 0;
const indexs: number[] = [];
const lens: number[] = [];
let nestCount = 0;
for (const escapeRE = /\x1b\[([0-9;:]*)m/g; match = escapeRE.exec(line);) {
const index = escapeRE.lastIndex;
const len = match[0].length;
const start = index - len;
const source_segment = line.slice(idx, start);
html += htmlEscape(source_segment);
cleaned += source_segment;
indexs.push(start);
lens.push(len);
// l(`ansi escapes: ${col.magenta}>>>${reset}${line}${col.magenta}<<<${reset}`, `idx=${index}`, 'match', match, `ss >>${source_segment}<<`, `rebuilt >>${html}<<`, `idx=${idx}`);
const colorCode = match[1];
// filter out special meaning codes. the rest of the simple codes will be sent straight for handling via CSS.
// TODO: consider parsing and splitting on semicolon. But for now I do not generate combined sequences yet,
// and the same effects can be achieved with multiple separate sequences.
// TODO: Actually track state and close tags when needed when detecting changing within modes. Not far from being
// able to do this.
// will NOT try to tackle the issue of combining multiple effects across boundaries, e.g. halfway through bold
// style change the background style and expect to maintain the bold style. HTML is fundamentally not designed
// for this.
let tag = '';
switch (colorCode) {
case "0": // reset all
case "":
case "39": // reset foreground
case "49": // reset background
case "22": // reset bold/dim
case "23": // reset italic
case "24": // reset underline
case "27": // reset inverse
case "29": // reset strikethrough
case "59": // reset underline color
tag = '</span>';
nestCount--;
break;
case "1": // bold
tag = '<span class="ansi-bold">';
break;
case "2": // dim
tag = '<span class="ansi-dim">';
break;
case "3": // italic
tag = '<span class="ansi-italic">';
break;
case "4": // underline
tag = '<span class="ansi-underline">';
break;
case "7": // inverse
tag = '<span class="ansi-inverse">';
break;
case "9": // strikethrough
tag = '<span class="ansi-strikethrough">';
break;
default: // handle classes. 8 and 256 colors will probably get implemented as a CSS lookup table via CSSOM; 24 bit colors will use inline style
let match: RegExpMatchArray | null;
if (match = colorCode.match(/^38;5;(\d+)$/)) {
tag = `<span class='ansi-fg-256-${match[1]}'>`;
} else if (match = colorCode.match(/^48;5;(\d+)$/)) {
tag = `<span class='ansi-bg-256-${match[1]}'>`;
} else if (match = colorCode.match(/^38;2;(\d+);(\d+);(\d+)$/)) {
tag = `<span class='ansi-tc-fg' style='color:rgb(${match[1]},${match[2]},${match[3]};'>`;
} else if (match = colorCode.match(/^48;2;(\d+);(\d+);(\d+)$/)) {
tag = `<span class='ansi-tc-bg' style='background-color:rgb(${match[1]},${match[2]},${match[3]};'>`;
} else if (match = colorCode.match(/^4:(\d)$/)) {
tag = `<span class='ansi-ul-style-${match[1]}'>`;
} else if (match = colorCode.match(/^58[:;]5[:;](\d+)$/)) {
tag = `<span class='ansi-ul-256-${match[1]}'>`;
} else if (match = colorCode.match(/^58[:;]2[:;](\d+);(\d+);(\d+)$/)) {
tag = `<span class='ansi-ul-tc' style='text-decoration-color:rgb(${match[1]},${match[2]},${match[3]};'>`;
} else if (match = colorCode.match(/^(\d+)$/)) { // plain 8/16-color colors
const color = parseInt(colorCode);
if (color <= 37 && color >= 30) { // 8 color fg
tag = `<span class="ansi-fg-${color}">`
}
if (color <= 47 && color >= 40) { // 8 color bg
tag = `<span class="ansi-bg-${color}">` // what
}
if (color <= 97 && color >= 90) { // bright? "16 color" fg
tag = `<span class="ansi-fg-${color}">`
}
}
break;
}
if (tag.indexOf('<span') === 0) {
nestCount++;
}
html += tag;
idx = index;
}
// this handles the last segment that comes after the last escape sequence, or the entire line when no escape
// sequences are present.
const final_source_segment = line.slice(idx);
html += htmlEscape(final_source_segment);
cleaned += final_source_segment;
index_a.push(indexs);
len_a.push(lens);
// equalize nesting on a per line basis to stop runaway nesting
if (nestCount < 0) {
html = '<span>'.repeat(-nestCount) + html;
// l(`negative nestCount=${nestCount} content=${html}`)
} else if (nestCount > 0) {
html += '</span>'.repeat(nestCount);
// l(`positive nestCount=${nestCount} content=${html}`)
}
// very straightforward affair compared to the usual problem of converting ansi codes, since I'm
// using this on colors that I generate in the app myself, it's possible to directly convert
// color sequences into html replacements.
// Do not need to confirm if there is a matching number of opening and closing tags or even
// their position... just straight up convert to tags, and then keep track of depth and supply
// closing tags by the end of the line in a way to ensure no unnecessary depth is being
// introduced into the tree.
html_a.push(html);
cleaned_a.push(cleaned);
});
return { html:html_a, cleaned:cleaned_a, idxs: index_a, lens: len_a };
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment