Last active
December 25, 2015 13:59
-
-
Save c00kiemon5ter/6988276 to your computer and use it in GitHub Desktop.
convert CDATA math blocks, to figure@asset,math/p tags
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/awk -f | |
# | |
# script and figure tags are expected to start and end in the same line | |
# | |
# [good] <figure class="foo" id="bar"> [bad] <script type= | |
# "foo"> | |
# | |
# if a figure is met extract the | |
# data-fignum field and value | |
# if it is a close tag reset | |
fig && index($0, "</figure>") { | |
fig = 0 | |
fignum = "" | |
} | |
index($0, "<figure") { | |
fig = 1 | |
n = split($0, toks, "\"") | |
if (n >= 5 && toks[2] == "formula" && toks[3] == " data-fignum=") fignum = toks[4] | |
} | |
# if script tag is met extract the | |
# type of the script | |
# if it matches what we're looking for | |
# put the line on hold and skip print | |
# as the next line may not be a CDATA | |
# skip to the next line without print | |
(i = index($0, "<script")) { | |
n = split($0, toks, "\"") | |
if (n >= 3 && index(toks[2], "math/tex;") == 1) { | |
scr = 1 | |
hold = $0 | |
next | |
} | |
} | |
# if we are inside a script tag and a | |
# CDATA block is met then replace the | |
# CDATA declaration with the desired | |
# element | |
scr && match($0, "<[ \t]*!\[CDATA\[") { | |
cdt = 1 | |
startstr = "<figure class=\"asset math\" data-num=\"" fignum "\" id=\"" fignum "\"><p> ~~~ " | |
sub("<[ \t]*!\[CDATA\[", startstr, $0) | |
} | |
# if the hold buffer is not empty then | |
# if line is a CDATA remove the script tag | |
# otherwise line should be restored before | |
# printing more lines and script close-tag | |
# should not be removed later | |
hold { | |
if (cdt) sub("<script[^>]*>", "", hold) | |
else scr = 0 | |
print hold | |
hold = 0 | |
} | |
# if we are inside a CDATA block and | |
# the CDATA close-tag is met, replace | |
# it with the close tags of the elements | |
# added in place of the CDATA start-tag | |
# and reset the data-num field values | |
cdt && match($0, "\]\][ \t]*>") { | |
cdt = 0 | |
endstr = " ~~~ </p></figure>" | |
sub("\]\][ \t]*>", endstr, $0) | |
fignum = "" | |
} | |
# if we are inside a script tag and a | |
# script close-tag is met, then remove | |
# the script close-tag from the line | |
scr && (i = index($0, "</script>")) { | |
scr = 0 | |
sub("</script>", "", $0) | |
} | |
# finally print the current line | |
{ | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment