HTML Table to Markdown Extra converter
<!DOCTYPE html>
<meta charset="utf-8">
<title>HTML Table to Markdown Extra Table</title>
* { -moz-box-sizing: border-box; -webkit-box-sizing: border-box; box-sizing: border-box;}
body { font-family: -apple-system, "Segoe UI", Arial, Helvetica, sans-serif; line-height: 1.5;
text-rendering: optimizeLegibility; -webkit-font-smoothing: antialiased; -moz-osx-font-smoothing: grayscale; }
textarea { width: 100%; height: 15em; }
button { font-size: inherit; }
p, li { max-width: 75ch; } /* WCAG 2.0, guideline 1.4.8 */
.two-by { display: grid; grid-gap: 1rem; gap: 1rem; }
.two-by h2, .two-by p { margin: 0; }
code { color: #303; font-weight: bold; font-family: Consolas, Menlo, "Courier New", Courier, monospace; border-radius: 2px; border: 1px solid rgba(0, 0, 0, 0.2); padding: 0 0.2em; background-color: #eee; }
@media (min-width: 40rem) {
.two-by { grid-template-columns: 1fr 1fr; }
<h1>HTML Table to Markdown Extra Table</h1>
<p><em>Paste HTML table code into the Input, click the Convert button, and a
<a href="">Markdown Extra table</a>
will be placed in the Output. This is meant as a first-pass for table conversion and will
not work for all types of tables.</em></p>
<div class="two-by">
<form method="post" id="form">
<p><textarea name="in" id="in" autofocus placeholder="paste HTML table code here">&lt;table&gt;&lt;thead&gt;
&lt;th&gt;Header 1&lt;/th&gt;
&lt;th&gt;Header 2&lt;/th&gt;
&lt;td&gt;&lt;a href=&quot;;&gt;Cell 1, 1&lt;/a&gt;&lt;/td&gt;
&lt;td&gt;Cell &lt;em&gt;1, 2&lt;/em&gt;&lt;/td&gt;
&lt;td&gt;Cell &lt;code&gt;2, 1&lt;/code&gt;&lt;/td&gt;
&lt;td&gt;Cell &lt;strong&gt;2, 2&lt;/strong&gt;&lt;/td&gt;
&lt;td&gt;&lt;img src=&quot;cat.png&quot; alt=&quot;cat pic&quot;&gt;&lt;/td&gt;
&lt;td&gt;&lt;img alt='another cat pic' src=&quot;kitty.gif&quot; width=&quot;300&quot;&gt;&lt;/td&gt;
<button type="submit" id="submit">Convert</button></p>
<p><textarea id="out" placeholder="markdown extra converted table will appear here"></textarea></p>
<li>Malformed HTML such as missing closing tags will likely produce undesirable results.
Tag attribute values, such as the URL for <code>href</code> are assumed to be quoted
with <code>&quot;</code> or <code>'</code>. For this tool, it only affects <code>href</code>,
<code>src</code>, and <code>alt</code> attributes.</li>
<li>No CSS styling is preserved.</li>
<li>Cell widths in the Markdown are not equalized.</li>
<li>The first row in the table is assumed to be the header row, regardless
of <code>thead</code> and <code>th</code> tags.</li>
<li>Cells containing <code>ul/ol</code>, <code>p</code> are compressed to single
line cells. It's likely that such a complex table would be better served with
different formatting (headings, paragraphs) anyway for
accessibility/readability reasons.</li>
<li>Tables with <code>colspan</code> and <code>rowspan</code> are likely to
produce undesirable results and are beyond the scope of Markdown Extra anyway.</li>
<li><code>img</code> tags are converted to Markdown only if they have a
<code>src</code> and an <code>alt</code> attribute. Why?
<a href="">WCAG 2.0, guideline 1.1</a>.
All other attributes are ignored, including <code>title</code>.</li>
<li><code>a</code>, <code>br</code>, <code>strong</code>, <code>em</code>,
and <code>code</code> tags are converted to Markdown. <strong>All other
tags are discarded.</strong></li>
((window, document) => {
// compliments to
const repeat = (pattern, count) => {
if (count < 1) return '';
let result = '';
while (count > 1) {
if (count & 1) result += pattern;
count >>= 1, pattern += pattern;
return result + pattern;
// cache the inputs
const i = document.getElementById('in');
const o = document.getElementById('out');
// bind a submit handler to the form
document.getElementById('form').addEventListener('submit', ev => {
// stop the normal form submit process because we're doing everything here in this function
// get the input text
let t = i.value;
// only proceed if there is text to work with
if (t.length) {
// now perform all the changes on our t string via the r() function above
t = t.replace(/\t/g, ' '); // convert tabs to a single space
t = t.replace(/\s*[\r\n]\s*/g, ''); // remove lines
t = t.replace(/<\!--[\s\S]*?-->/g, ''); // remove html comments
t = t.replace(/ *<a[^>]* href=(["'])(.*?)\1[^>]*> *(.*?)<\/a>/ig, '[$3]($2)'); // convert anchor tags
t = t.replace(/<\/?strong.*?>/g, '**'); // convert strong to **
t = t.replace(/<\/?em.*?>/g, '_'); // convert em to _
t = t.replace(/<\/?code.*?>/g, '`'); // convert code to `
t = t.replace(/<img[^>]* src=(["'])(.*?)\1[^>]* alt=(["'])(.*?)\3[^>]*>/ig, '![$4]($2)'); // convert images with src, alt
t = t.replace(/<img[^>]* alt=(["'])(.*?)\1[^>]* src=(["'])(.*?)\3[^>]*>/ig, '![$2]($4)'); // convert images with alt, src
t = t.replace(/ *<tr[^>]*>/ig, '\n|'); // build <tr> as "\n|"
t = t.replace(/\s*<t[dh].*?>/ig, ' '); // convert <td> and <th> to a space
t = t.replace(/\s*<\/t[dh]>/ig, ' |'); // build </td> and </th> as " |"
t = t.replace(/&nbsp;/ig, ''); // drop non-breaking spaces
t = t.replace(/&amp;/ig, '&'); // de-entize ampersands
t = t.replace(/<br[^>]*>/ig, '\t'); // temporarily convert BR tags to tabs
t = t.replace(/<\/?[^>]+>/ig, ''); // drop all other tags
t = t.replace(/\t *\|/g, ' |'); // drop cell-ending BRs
t = t.replace(/\s*\t\s*/g, '<br />'); // convert tabs back to BR tags
t = t.replace(/\| {2,}/g, '| '); // tighten spacing after the pipe symbols
t = t.replace(/ {2,}\|/g, ' |'); // tighten spacing before the pipe symbols
t = t.replace(/^ +\|/gm, '|'); // trim line-leading whitespace
t = t.replace(/ {4,}/g, ' '); // convert 4+ spaces to three spaces
t = t.replace(/^\s+|\s+$/g, ''); // trim whitespace
// generate the header row separators
const lines = t.split("\n");
if (lines && lines.length) {
const segments = lines[0].split('|');
let headers = '|';
for (let j = 1; j < segments.length - 1; j++) {
headers += repeat('-', segments[j].length) + '|';
// console.log(headers);
t = lines[0] + "\n" + headers + "\n" + lines.slice(1).join("\n");
// put the new version into the output box
o.value = t;
// clear the old version for quick pasting of new code
i.value = '';
// select all the text in the output box and set the browser focus to the output box;
})(window, document);
LICENSE: CC0, Public Domain
2021-02-11, SW:
- stop paying the jQuery tax
- upgrade to ES6 syntax
- swap the CSS to use Grid instead of Flexbox and renamed to .two-by
because… reasons
- allow tick parameter value delimiter support in addition to the default
double quote delimiter; only for href, src, alt
- add some img tags to the sample table
- make other mysterious but minor tweaks to html, notes, and CSS
- add this change log
- add license
