Skip to content

Instantly share code, notes, and snippets.

@sr
Created May 21, 2009 14:14
Show Gist options
  • Star 7 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save sr/115485 to your computer and use it in GitHub Desktop.
Save sr/115485 to your computer and use it in GitHub Desktop.
Markdown in awk(1)
#!/bin/awk -f
# md2html.awk
# by: Jesus Galan (yiyus) <yiyu.jgl@gmail>, May 2009
# Usage:
# md2html file.md > file.html
# Options: -v esc=false to not escape html
function newblock(nblock){
if(text)
print "<" block ">" text "</" block ">";
text = "";
block = nblock ? nblock : "p";
}
function subinline(tgl, inl){
while(match($0, tgl)){
if (inline[ni] == inl)
ni -= sub(tgl, "</" inl ">");
else if (sub(tgl, "<" inl ">"))
inline[++ni] = inl;
}
}
function dolink(href, lnk){
# Undo escaped html in uris
gsub(/&amp;/, "\\&", href);
gsub(/&lt;/, "<", href);
gsub(/&gt;/, ">", href);
# & can be tricky, and not standard:
gsub(/&/, "\\\\\\&", href);
gsub(/&/, "\\\\\\&", lnk);
return "<a href=\"" href "\">" lnk "</a>";
}
BEGIN {
ni = 0; # inlines
nl = 0; # nested lists
text = "";
block = "p";
}
# Escape html
esc != "false" {
gsub("&", "\\&amp;")
gsub("<", "\\&lt;")
gsub(">", "\\&gt;")
}
# Horizontal rules (_ is not in markdown)
/^[ ]*([-*_] ?)+[ ]*$/ && text == "" {
print "<hr>";
next;
}
# Tables (not in markdown)
# Syntax:
# Right Align| Center Align |Left Align
/([ ]\|)|(\|[ ])/ {
if(block != "table")
newblock("table");
nc = split($0, cells, "|");
$0 = "<tr>\n";
for(i = 1; i <= nc; i++){
align = "left";
if(sub(/^[ ]+/, "", cells[i])){
if(sub(/[ ]+$/, "", cells[i]))
align = "center";
else
align = "right";
}
sub(/[ ]+$/,"", cells[i]);
$0 = $0 "<td align=\"" align "\">" cells[i] "</td>\n";
}
$0 = $0 "</tr>";
}
# Ordered and unordered (possibly nested) lists
/^[ ]*([*+-]|(([0-9]+[\.-]?)+))[ ]/ {
newblock("li");
nnl = 1;
while(match($0, /^[ ]/)){
sub(/^[ ]/,"");
nnl++;
}
while(nl > nnl)
print "</" list[nl--] ">";
while(nl < nnl){
list[++nl] = "ol";
if(match($0, /^[*+-]/))
list[nl] = "ul";
print "<" list[nl] ">";
}
sub(/^([*+-]|(([0-9]+[\.-]?)+))[ ]/,"");
}
# Multi line list items
block == "li" {
sub(/^( *)|( *)/,"");
}
# Code blocks
/^( | )/ {
if(block != "code")
newblock("code");
sub(/^( | )/, "");
text = text $0 "\n";
next;
}
# Paragraph
/^$/ {
newblock();
while(nl > 0)
print "</" list[nl--] ">";
}
# Setex-style Headers
# (Plus h3 with underscores.)
/^=+$/ {
block = "h" 1;
next;
}
/^-+$/ {
block = "h" 2;
next;
}
/^_+$/ {
block = "h" 3;
next;
}
# Atx-style headers
/^#/ {
newblock();
match($0, /#+/);
n = RLENGTH;
if(n > 6)
n = 6;
text = substr($0, RLENGTH + 1);
block = "h" n;
next;
}
// {
# Images
while(match($0, /!\[[^\]]+\]\([^\)]+\)/)){
split(substr($0, RSTART, RLENGTH), a, /(!\[)|\)|(\]\()/);
sub(/!\[[^\]]+\]\([^\)]+\)/, "<img src=\"" a[3] "\" alt=\"" a[2] "\">");
}
# Links
while(match($0, /\[[^\]]+\]\([^\)]+\)/)){
split(substr($0, RSTART, RLENGTH), a, /[\[\)]|(\]\()/);
sub(/\[[^\]]+\]\([^\)]+\)/, dolink(a[3], a[2]));
}
# Auto links (uri matching is poor)
na = split($0, a, /(^\()|[ ]|([,\.\)]([ ]|$))/);
for(i = 1; i <= na; i++)
if(match(a[i], /^(((https?|ftp|file|news|irc):\/\/)|(mailto:)).+$/))
sub(a[i], dolink(a[i], a[i]));
# Inline
subinline("(\\*\\*)|(__)", "strong");
subinline("\\*", "em");
subinline("`", "code");
text = text (text ? " " : "") $0;
}
END {
while(ni > 0)
text = text "</" inline[ni--] ">";
newblock();
while(nl > 0)
print "</" list[nl--] ">";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment