Skip to content

Instantly share code, notes, and snippets.

@apg
Created March 20, 2017 22:53
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save apg/69bef672e1e1e85ca90c5e43706fbdbf to your computer and use it in GitHub Desktop.
Save apg/69bef672e1e1e85ca90c5e43706fbdbf to your computer and use it in GitHub Desktop.
### ====================================================================
### @Awk-file{
### author = "Nelson H. F. Beebe",
### version = "1.06",
### date = "24 October 1997",
### time = "21:34:34 MDT",
### filename = "man2html.awk",
### address = "Center for Scientific Computing
### University of Utah
### Department of Mathematics, 105 JWB
### 155 S 1400 E RM 233
### Salt Lake City, UT 84112-0090
### USA",
### telephone = "+1 801 581 5254",
### FAX = "+1 801 581 4148",
### URL = "http://www.math.utah.edu/~beebe",
### checksum = "01400 968 2975 23193",
### email = "beebe@math.utah.edu (Internet)",
### codetable = "ISO/ASCII",
### keywords = "nroff, troff, UNIX manual page",
### supported = "yes",
### docstring = "This program converts UNIX manual pages
### in nroff/troff markup to strictly-conformant
### HTML 2.0, 3.0, or 3.2. [Actually, only two
### HTML 3.x entities (` ' and `­')
### are used, and those rarely; otherwise, the
### syntax conforms strictly to HTML 2.0.]
###
### Usage:
### nawk -f man2html.awk [HTML=2|3|3.2] \
### manpage-file >html-file
###
### The single option, HTML=2, HTML=3, or
### HTML=3.2, selects the HTML grammar level.
### The default is HTML=2.
###
### This program is normally run via a shell
### wrapper that offers an option for setting the
### output file name. It has been used to
### successfully convert entire man-page
### collections on several UNIX systems to HTML
### form for convenient World-Wide Web browser
### access.
###
### Of those nroff/troff commands defined in the
### -man format used for UNIX manual pages, only
### the most commonly-used ones are supported;
### unrecognized ones will be warned about, and
### preserved as HTML comments in the output.
###
### UNIX man pages tend to be written in a
### highly-stylized fashion that we apply
### heuristics to in order to recover high-level
### HTML structure from low-level nroff/troff
### markup. Deviations from conventional
### man-page writing practice will likely result
### in less-than-perfect translation to HTML.
###
### Although there are several other `man2html'
### translators available on the Internet, this
### one is entirely of my own authorship, with no
### code borrowing from anywhere else.
###
### The checksum field above contains a CRC-16
### checksum as the first value, followed by the
### equivalent of the standard UNIX wc (word
### count) utility output of lines, words, and
### characters. This is produced by Robert
### Solovay's checksum utility.",
### }
### ====================================================================
BEGIN { initialize() }
/^[.]ie +t +[.]ds/ { getline } # fall through: next line should be .el
/^[.]el +.ds/ { define($3); next }
/^[.']\\"/ { cmd_comment($0); next } # save comments
/^[.]if +n *\\\{/ { cmd_comment($0); next }
/^[.]if +t *\\\{/ { cmd_comment_block($0); next } # convert troff directives to comments
/^ *\\\}/ { cmd_comment($0); next }
/^[.]if +t/ { cmd_comment($0); next } # convert troff directives to comments
/^[.]if +n +[.]ds/ { define($4); next }
/^[.]if +n +[.]ti/ { cmd_comment($0); next } # convert nroff spacing directives to comments
/^[.]ie +n +[.]ds/ { define($4); next }
/^[.]SH/ { cmd_SH(); next }
/^[.]SS/ { cmd_SS(); next }
/^[.]TH/ { cmd_TH(); next }
/^[.]B / { cmd_B(); next }
/^[.]I / { cmd_I(); next }
/^[.]IX / { cmd_IX(); next }
/^[.]R / { cmd_R(); next }
/^[.]ad/ { cmd_ad(); next }
/^[.][BIR]$/ { cmd_BIR(); next }
/^[.]BI / { cmd_XY("B","I"); next }
/^[.]br/ { cmd_br(); next }
/^[.]BR / { cmd_XY("B","R"); next }
/^[.]ce[ 0-9]*$/ { cmd_ce(); next }
/^[.]hw/ { cmd_hw(); next }
/^[.]IB / { cmd_XY("I","B"); next }
/^[.]IR / { cmd_XY("I","R"); next }
/^[.]ne/ { cmd_ne(); next }
/^[.]RB / { cmd_XY("R","B"); next }
/^[.]RI / { cmd_XY("R","I"); next }
/^[.]nf/ { cmd_nf(); next }
/^[.]fi/ { cmd_fi(); next }
/^[.]IP/ { cmd_IP(); next }
/^[.]LP/ { cmd_LP(); next }
/^[.]na/ { cmd_na(); next }
/^[.]PP/ { cmd_PP(); next }
/^[.]RE/ { cmd_RE(); next }
/^[.]RS/ { cmd_RS(); next }
/^[.]sp/ { cmd_sp(); next }
/^[.]TP/ { cmd_TP(); next }
/^[.]TS/ { cmd_TS(); next }
/^[.][A-Za-z]/ { cmd_unknown(); next }
{ print_line(strtohtml($0)) }
END { terminate(); }
# The anchor() function is adapted from my bibtex-to-html.awk file
function anchor(s,type,pattern,offset,prefix,save_label, name,rstart,rlength,save)
{
# Add anchors <A type="....">...</A> around text in s matching
# pattern. A non-zero offset discards that many characters from
# the start of the match, allowing the pattern to contain leading
# context which goes outside the anchored region. The prefix is
# attached to the start of the matched string, inside the value
# quotes in the anchor.
if (match(s,pattern))
{
rstart = RSTART # need private copies of these globals because
rlength = RLENGTH # recursion will change them
rstart += offset # adjust by offset to discard leading
rlength -= offset # context in pattern
name = substr(s,rstart,rlength)
sub(/ +at +/,"@",name) # reduce "user at host" to "user@host"
s = substr(s,1,rstart-1) \
"<A " type "=\"" prefix name "\">" \
((type == "NAME") ? "<STRONG>" : "") \
substr(s,rstart,rlength) \
((type == "NAME") ? "</STRONG>" : "") \
"</A>" \
anchor(substr(s,rstart+rlength),type,pattern,offset,prefix,save)
}
return (s)
}
function begin_toc()
{
print_toc("<H1>")
print_toc("Table of contents")
print_toc("</H1>")
print_toc("<UL>")
In_TOC_Item = 0
}
function cmd_ad()
{ # .ad: turn on adjust (flush-left-and-right justification)
cmd_comment($0) # no HTML equivalent
}
function cmd_B( s)
{
end_font()
if (match($0,/^[.]B *\"/))
{
s = substr($0,RSTART+RLENGTH)
gsub(/[" ]*$/,"",s)
print_line("<STRONG>" strtohtml(s) "</STRONG>")
}
else
print_line("<STRONG>" strtohtml($2) "</STRONG>")
}
function cmd_BIR()
{
end_font()
print_line(strtohtml("\\f" substr($0,2,1))) # Remap .B into \fB etc
}
function cmd_br()
{
cmd_PP()
}
function cmd_ce( k,n)
{
# .ce nnn: turn on centering for next nnn lines (nnn = 0 turns it off)
n = $2
cmd_comment($0)
if (n > 0)
{
# The HTML 3.2 grammar supports <CENTER> ... </CENTER> as a
# shorthand for the more general <DIV ALIGN=CENTER> ... </DIV>
# (CENTER can be replaced by LEFT or RIGHT). However, except
# for amaya (W3C's testbed for HTML 3.2), none of the current
# browsers support DIV. grail, hotjava, netscape all recognize
# CENTER. arena, chimera, lynx, and xmosaic do not recognize it
# either.
if (HTML == "3.2")
print_line("<CENTER>")
for (k = 1; k <= n; ++k)
{
getline
print_line(strtohtml($0) "<BR>")
}
if (HTML == "3.2")
print_line("</CENTER>")
}
}
function cmd_comment(s)
{
In_Comment = 1
sub(/^[.']\\"/,"",s) # remove troff comment prefix: it confuses html-pretty
print_line("<!-- " strtohtml(s) " -->")
In_Comment = 0
}
function cmd_comment_block(s)
{
cmd_comment(s)
In_Comment = 1
while (getline s > 0)
{
cmd_comment(s)
if (s ~ /^ *\\\}/)
break # found end of block
}
In_Comment = 0
}
function cmd_I( s)
{
end_font()
if (match($0,/^[.]I *\"/))
{
s = substr($0,RSTART+RLENGTH)
gsub(/[" ]*$/,"",s)
print_line("<EM>" strtohtml(s) "</EM>")
}
else
print_line("<EM>" strtohtml($2) "</EM>")
}
function cmd_IX()
{
# .IX index entry lines are simply discarded
while (match($0,/\\$/) && (getline > 0))
; # discard continuation lines
}
function cmd_fi()
{
end_font()
if (In_PRE)
{
print_line("</PRE>")
In_PRE = 0
}
else
cmd_comment($0)
}
function cmd_hw()
{ # .hw word-hyph-en-a-tion ex-cep-tions
cmd_comment($0)
}
function cmd_IP()
{
end_font()
PP++
print_line("<P>")
}
function cmd_LP()
{
end_font()
PP++
print_line("<P>")
}
function cmd_na()
{ # .na: no adjust: turn off flush-left-and-right justification, producing ragged-right
cmd_comment($0) # no HTML equivalent
}
function cmd_ne()
{ # .ne dimen: need dimen vertical space before end of page
# otherwise, force a page break (e.g. to prevent page
# breaks after headings)
cmd_comment($0)
}
function cmd_nf()
{
end_font()
if (In_PRE)
cmd_comment($0)
else
{
print_line("<PRE>")
In_PRE = 1
}
}
function cmd_PP()
{
end_font()
PP++
if (In_PRE) # <P> tags are illegal in <PRE>...</PRE> environments
print_line("")
else
print_line("<P>")
end_TP()
}
function cmd_R( s)
{
end_font()
if (match($0,/^[.]R *\"/))
{
s = substr($0,RSTART+RLENGTH)
gsub(/[" ]*$/,"",s)
print_line(strtohtml(s))
}
else
print_line(strtohtml($2))
}
function cmd_RE()
{
end_font()
if (In_PRE) # should not happen, but some man pages
cmd_fi() # are irregular
while (List_Level > RSE_List_Level[RSE_Level])
end_TP()
if (RSE_Level > 0)
RSE_Level--
print_line("</BLOCKQUOTE>")
}
function cmd_RS()
{
end_font()
RSE_List_Level[++RSE_Level] = List_Level
List_Level++ # new .TP level too
print_line("<BLOCKQUOTE>")
}
function cmd_SH( s)
{ # section heading
cmd_SH_SS("H1")
}
function cmd_SS( s)
{ # subsection heading
cmd_SH_SS("H2")
}
function cmd_SH_SS(tag, s)
{ # [sub]section heading
if (!TH_seen) # should not happen, but some man pages are
cmd_TH(substr($0,5)) # irregular
end_font()
while (RSE_Level > 0)
cmd_RE()
while (List_Level > 0)
end_TP()
if (tag == "H1")
{
H1++
if (H1 == 1)
begin_toc()
if (H2 > 0)
{
print_toc("</LI>")
print_toc("</UL>")
}
H2 = 0
if (H1 > 1)
print_line("<HR>") # a separating horizontal rule is a nice touch
}
else if (tag == "H2")
{
H2++
}
s = substr($0,5)
sub(/^ *\"/,"",s)
sub(/\" *$/,"",s)
s = strtohtml(s)
SH_SS_count = "." H1
if (H2 > 0)
SH_SS_count = SH_SS_count "." H2
print_line("<" tag ">")
print_line("<A NAME=\"HDR" SH_SS_count "\">")
print_line(s)
print_line("</A>")
print_line("</" tag ">")
if (In_TOC_Item && (H2 != 1))
print_toc("</LI>")
if (H2 == 1)
print_toc("<UL>")
In_TOC_Item = 1
print_toc("<LI>")
print_toc("<A HREF=\"#HDR" SH_SS_count "\">")
print_toc(s)
print_toc("</A>")
}
function cmd_sp()
{ # .sp nnn: vertical space
cmd_comment($0) # no sensible HTML equivalent
}
function cmd_TH( line)
{
end_font()
print_line("<HTML>")
print_line("<HEAD>")
print_line("<TITLE>")
line = $0
while (line ~ /\\$/)
{
getline
line = substr(line,1,length(line)-1) $0
}
print_line(strtohtml(substr(line,4)))
print_line("</TITLE>")
print_line("<LINK REV=\"made\" HREF=\"mailto:" LOGNAME "@" HOSTNAME "\">")
print_line("</HEAD>")
print_line("")
print_line("<BODY>")
print_line("")
TH_seen = 1
}
function cmd_TP()
{
end_font()
getline # this is the item label, usually "\(bu" or ".B ..."
if (Item_Count[List_Level] == 0) # then first item of new list
{
List_Level++
Item_Count[List_Level] = 0
if ($0 == "\\(bu")
{
List_Name[List_Level] = "UL"
List_Item[List_Level] = "LI"
}
else
{
List_Name[List_Level] = "DL"
List_Item[List_Level] = "DT"
}
if (Item_Count[List_Level] == 0)
print_line("<" List_Name[List_Level] ">")
}
Item_Count[List_Level]++
if (List_Name[List_Level] == "DL")
{
if (Item_Count[List_Level] > 1)
print_line("</DD>")
print_line("<DT>")
if ($0 ~ /^[.]B /)
cmd_B()
else if ($0 ~ /^[.]I /)
cmd_I()
else if ($0 ~ /^[.]R /)
cmd_R()
else if ($0 ~ /^[.]BR/)
cmd_XY("B","R")
else if ($0 ~ /^[.]BI/)
cmd_XY("B","I")
else if ($0 ~ /^[.]IB/)
cmd_XY("I","B")
else if ($0 ~ /^[.]IR/)
cmd_XY("I","R")
else if ($0 ~ /^[.]RB/)
cmd_XY("R","B")
else if ($0 ~ /^[.]RI/)
cmd_XY("R","I")
else
print_line(strtohtml($0))
end_font()
if (In_PRE) # should not happen, but some man pages
cmd_fi() # are irregular
print_line("</DT>")
print_line("<DD>")
}
else # must be <UL> <LI> ... </LI> </UL> type list
{
if (Item_Count[List_Level] > 1)
print_line("</LI>")
print_line("<LI>")
}
}
function cmd_TS( tbl_nroff_cmd)
{
# Copy the table to a temporary file
print $0 >TBLFILE
while (getline > 0)
{
print $0 >TBLFILE
if ($0 ~ /^[.]TE/) # then end of table found
break
}
close (TBLFILE)
# Run tbl, nroff, and col to convert the table to
# formatted text, and include it as a preformatted
# environment.
tbl_nroff_cmd = "tbl " TBLFILE " | nroff -man | col -b"
print_line("<PRE>")
while ((tbl_nroff_cmd | getline) > 0)
print_line(strtohtml($0))
print_line("</PRE>")
close (tbl_nroff_cmd)
delete_file(TBLFILE)
}
function cmd_unknown()
{
end_font()
warning("Unrecognized nroff/troff command in [" $0 "] changed to comment")
cmd_comment($0)
}
function cmd_XY(x,y, font,k)
{
end_font()
protect_quoted_args()
for (k = 2; k <= NF; ++k)
{
font = Font_Map[(k % 2) ? y : x]
printf("%s%s%s", html_font_begintag(font), strtohtml(unprotect_quoted_arg($k)), \
html_font_endtag(font)) > TMPFILE
}
print_line("")
}
function define(name, regexp)
{
# Typical values:
# .if n .ds Bi BibTeX
# .el .ds Bi BibTeX
# Macro used as \*(Bi, but stored as a regexp
regexp = "\\\\\\*\\(" name
Macro[regexp] = substr($0,index($0,name)+3)
}
function delete_file(s)
{
system("/bin/rm -f " s)
}
function end_font()
{
for (; Font_Level > 0; Font_Level--)
print_line(html_font_endtag(HTML_Font_Name[Font_Level]))
}
function end_toc()
{
print_toc("</LI>")
print_toc("</UL>")
print_toc("<HR>")
close (TOCFILE)
}
function end_TP()
{
if (Item_Count[List_Level] > 0)
{
if (List_Name[List_Level] == "DL")
{
print_line("</DD>")
print_line("</DL>")
}
else
{
print_line("</LI>")
print_line("</UL>")
}
}
Item_Count[List_Level] = 0
if (List_Level > 0)
List_Level--
}
function font_sub(s, tag)
{
while (match(s,/\\f[BCIPRST]/))
{
if (substr(s,RSTART+2,1) == "P") # revert to previous font
{
tag = html_font_endtag(HTML_Font_Name[Font_Level])
if (Font_Level > 0)
Font_Level--
}
else # set explicit font
{
Font_Level++
HTML_Font_Name[Font_Level] = Font_Map[substr(s,RSTART+2,1)]
tag = html_font_begintag(HTML_Font_Name[Font_Level])
# Handle ...\fB...\fR... style by ending previous font
if (Font_Level > 1)
{
tag = html_font_endtag(HTML_Font_Name[Font_Level-1]) tag
HTML_Font_Name[Font_Level-1] = HTML_Font_Name[Font_Level]
Font_Level--
}
}
s = substr(s,1,RSTART-1) tag substr(s,RSTART+3)
}
return (s)
}
function html_font_begintag(name)
{
if (name == "")
return ""
else
return "<" name ">"
}
function html_font_endtag(name)
{
if (name == "")
return ""
else
return "</" name ">"
}
function initialize()
{
# Change these two lines whenever the program is modified
VERSION_NUMBER = "1.06"
VERSION_DATE = "[24-Oct-1997]"
VERSION = "Version " VERSION_NUMBER " " VERSION_DATE
"echo $LOGNAME" | getline LOGNAME
"hostname" | getline HOSTNAME
"date" | getline DATE
if (HTML == "")
HTML = 2
if ((HTML != 2) && (HTML != 3) && (HTML != "3.2"))
{
warning("Unsupported HTML level " HTML " requested: defaulting to HTML level 2")
HTML = 2
}
Font_Map["B"] = "STRONG"
Font_Map["C"] = "TT"
Font_Map["I"] = "EM"
Font_Map["R"] = ""
Font_Map["S"] = "" # cannot map symbol font yet
Font_Map["T"] = "TT"
Macro["\\\\e"] = "\\"
if (HTML == 2)
Macro["\\\\0"] = "\\&#160;" # change non-breakable space to numeric entity
else if (HTML >= 3)
Macro["\\\\0"] = "\\&nbsp;" # can finally use named entity
else
warning("No conversion implemented for \\\\0 (non-breakable space) in HTML level", HTML)
TOCFILE = "/tmp/man2html.toc"
TBLFILE = "/tmp/man2html.tbl"
TMPFILE = "/tmp/man2html.tmp"
H1 = 0
H2 = 0
Macro["\\\\\\(bu"] = "\\&#164;"
Macro["\\\\\\(em"] = "---"
Macro["\\\\\\(en"] = "--"
# The following fragment for setting URL_xxx variables
# is borrowed intact from my bibtex-to-html.awk file:
#
# According to Internet RFC 1614 (May 1994), a URL is
# defined in the document T. Berners-Lee, ``Uniform
# Resource Locators'', March 1993, available at URL
# ftp://info.cern.ch/pub/ietf/url4.ps. Unfortunately,
# that address is no longer valid. However, I was able to
# track down pointers from http://www.w3.org/ to locate a
# suitable description in Internet RFC 1630 (June 1994).
# NB: We additionally disallow & in a URL because it is
# needed in SGML entities "&name;". We also disallow =
# and | because these are commonly used in \path=...= and
# \path|...| strings in BibTeX files. These restrictions
# could be removed if we went to the trouble of first
# encoding these special characters in %xy hexadecimal
# format, but they are rare enough that I am not going to
# do so for now. The worst that will happen from this
# decision is that an occasional URL in a BibTeX file will
# be missing a surrounding anchor.
# Bug fix [24-Oct-1997]: Add < and > to the set of excluded
# characters, to avoid incorrectly including SGML markup inside a
# URL. Before this fix, "\fChttp://www/\fP" got translated
# incorrectly to
# <TT><A HREF="http://www/</TT>">http://www/</TT></A>
# instead of the correct
# <TT><A HREF="http://www">http://www</A></TT>
URL_PATTERN = "[A-Za-z]+://[^ \",&=|<>]+"
URL_OFFSET = 0
URL_PREFIX = ""
URL_SAVE_LABEL = 0
E_MAIL_PATTERN = "[A-Za-z0-9_-]+@[A-Za-z0-9-]+([.][A-Za-z0-9-]+)*"
E_MAIL_OFFSET = 0
E_MAIL_PREFIX = "mailto:"
E_MAIL_SAVE_LABEL = 0
print_header()
}
function print_header()
{
print_line("<!-- Warning: Do NOT edit this file. -->")
print_line("<!-- It was created automatically by man2html.awk " VERSION " on " DATE " -->")
print_line("<!-- from the file " strtohtml(FILENAME) " at " HOSTNAME " -->")
print_line("")
if (HTML == 2)
print_line("<!DOCTYPE HTML public \"-//IETF//DTD HTML//EN\">")
else if (HTML == 3) # We need level 3 HTML only because of our use of &nbsp; and &shy;
print_line("<!DOCTYPE HTML public \"-//IETF//DTD HTML 3.0//EN\">")
else if (HTML == "3.2") # HTML 3.2 released 5-Nov-1996 at http://www.w3.org/pub/WWW
print_line("<!DOCTYPE HTML public \"-//W3C//DTD HTML 3.2//EN\">")
}
function print_line(s)
{
print s >TMPFILE
}
function print_toc(s)
{
print s >TOCFILE
}
function protect_quoted_args( inside,k,s)
{
if (index($0,"\"") == 0)
return
s = $0
inside = 0
for (k = 1; k <= length(s); ++k)
{
if (substr(s,k,1) == "\"")
inside = !inside
else if (inside && (substr(s,k,1) == " "))
s = substr(s,1,k-1) "\177" substr(s,k+1)
}
$0 = s
}
function strtohtml(s, name)
{
gsub(/\\$/,"",s) # discard backslash-newline
gsub(/\\-/,"-",s) # show troff minus as ASCII minus
gsub(/\\[&]/,"",s) # remove no-op macros
# gsub(/\\[|]/," ",s) # change thin space to space
gsub(/\\[|]/,"",s) # delete thin space (nroff does too)
gsub(/[&]/,"\\&amp;",s) # protect 3 or 4
gsub(/</,"\\&lt;",s) # special SGML
gsub(/>/,"\\&gt;",s) # characters
if (HTML == 2)
{
gsub(/\\ /,"\\&#160;",s)# represent literal space by numeric entity
gsub(/\\%/,"",s) # squeeze out discretionary hyphens
}
else if (HTML >= 3)
{
gsub(/\\ /,"\\&nbsp;",s) # preserve literal spaces
# NB: several browers fail to implement soft hyphen properly: they show
# it as an explicit hyphen when the word is not broken at end of line,
# instead of discarding it. We translate it correctly, and hope that
# broken browsers eventually get fixed, sigh...
gsub(/\\%/,"\\&shy;",s) # discretionary hyphen -> soft hyphen
}
if (In_Comment)
gsub(/--/,"__",s) # must hide -- pairs to avoid grammar error
else if (HTML == "3.2")
gsub(/\"/,"\\&#34;",s) # &quot; was left out of HTML 3.2, sigh...
else
gsub(/\"/,"\\&quot;",s) # but other versions, and SGML, have &quot;
# It is curious that browsers can display a bullet, but there is no
# HTML markup to represent it, and it is absent from the standard
# ISO8859-1 fonts
# gsub(/\\\(bu/,"\\&#164;",s) # change bullets to general currency sign
# &curren; but use numeric code because
# xmosaic does not recognize it
for (name in Macro) # substitute macro names
gsub(name,Macro[name],s)
s = font_sub(s)
gsub(/\\\\/,"\\",s) # reduce troff doubled backslash to single HTML one
# if (index(s,"\\") > 0) # check for anything we missed
# warning("Possible unrecognized nroff/troff markup in [" s "]")
if (!In_Comment) # no link inside comment; otherwise, browser shows text
{
s = anchor(s,"HREF",URL_PATTERN,URL_OFFSET,URL_PREFIX,URL_SAVE_LABEL)
s = anchor(s,"HREF",E_MAIL_PATTERN,E_MAIL_OFFSET,E_MAIL_PREFIX, \
E_MAIL_SAVE_LABEL)
}
return (s)
}
function terminate( x,y)
{
print_line("</BODY>")
print_line("</HTML>")
close (TMPFILE)
end_toc()
while (getline x < TMPFILE > 0)
{
if (x == "<H1>")
break
print x
}
while (getline y < TOCFILE > 0)
print y
close (TOCFILE)
delete_file(TOCFILE)
print x
while (getline x < TMPFILE > 0)
print x
close (TMPFILE)
delete_file(TMPFILE)
}
function unprotect_quoted_arg(s)
{
sub(/^"/,"",s) # remove leading and
sub(/"$/,"",s) # trailing quotes and
gsub(/\177/," ",s) # restore spaces
return (s)
}
function warning(message)
{
print FILENAME ":" FNR ":%%" message >"/dev/stderr"
}
### ====================================================================
### @Awk-file{
### author = "Nelson H. F. Beebe",
### version = "1.06",
### date = "24 October 1997",
### time = "21:34:34 MDT",
### filename = "man2html.awk",
### address = "Center for Scientific Computing
### University of Utah
### Department of Mathematics, 105 JWB
### 155 S 1400 E RM 233
### Salt Lake City, UT 84112-0090
### USA",
### telephone = "+1 801 581 5254",
### FAX = "+1 801 581 4148",
### URL = "http://www.math.utah.edu/~beebe",
### checksum = "01400 968 2975 23193",
### email = "beebe@math.utah.edu (Internet)",
### codetable = "ISO/ASCII",
### keywords = "nroff, troff, UNIX manual page",
### supported = "yes",
### docstring = "This program converts UNIX manual pages
### in nroff/troff markup to strictly-conformant
### HTML 2.0, 3.0, or 3.2. [Actually, only two
### HTML 3.x entities (`&nbsp;' and `&shy;')
### are used, and those rarely; otherwise, the
### syntax conforms strictly to HTML 2.0.]
###
### Usage:
### nawk -f man2html.awk [HTML=2|3|3.2] \
### manpage-file >html-file
###
### The single option, HTML=2, HTML=3, or
### HTML=3.2, selects the HTML grammar level.
### The default is HTML=2.
###
### This program is normally run via a shell
### wrapper that offers an option for setting the
### output file name. It has been used to
### successfully convert entire man-page
### collections on several UNIX systems to HTML
### form for convenient World-Wide Web browser
### access.
###
### Of those nroff/troff commands defined in the
### -man format used for UNIX manual pages, only
### the most commonly-used ones are supported;
### unrecognized ones will be warned about, and
### preserved as HTML comments in the output.
###
### UNIX man pages tend to be written in a
### highly-stylized fashion that we apply
### heuristics to in order to recover high-level
### HTML structure from low-level nroff/troff
### markup. Deviations from conventional
### man-page writing practice will likely result
### in less-than-perfect translation to HTML.
###
### Although there are several other `man2html'
### translators available on the Internet, this
### one is entirely of my own authorship, with no
### code borrowing from anywhere else.
###
### The checksum field above contains a CRC-16
### checksum as the first value, followed by the
### equivalent of the standard UNIX wc (word
### count) utility output of lines, words, and
### characters. This is produced by Robert
### Solovay's checksum utility.",
### }
### ====================================================================
BEGIN { initialize() }
/^[.]ie +t +[.]ds/ { getline } # fall through: next line should be .el
/^[.]el +.ds/ { define($3); next }
/^[.']\\"/ { cmd_comment($0); next } # save comments
/^[.]if +n *\\\{/ { cmd_comment($0); next }
/^[.]if +t *\\\{/ { cmd_comment_block($0); next } # convert troff directives to comments
/^ *\\\}/ { cmd_comment($0); next }
/^[.]if +t/ { cmd_comment($0); next } # convert troff directives to comments
/^[.]if +n +[.]ds/ { define($4); next }
/^[.]if +n +[.]ti/ { cmd_comment($0); next } # convert nroff spacing directives to comments
/^[.]ie +n +[.]ds/ { define($4); next }
/^[.]SH/ { cmd_SH(); next }
/^[.]SS/ { cmd_SS(); next }
/^[.]TH/ { cmd_TH(); next }
/^[.]B / { cmd_B(); next }
/^[.]I / { cmd_I(); next }
/^[.]IX / { cmd_IX(); next }
/^[.]R / { cmd_R(); next }
/^[.]ad/ { cmd_ad(); next }
/^[.][BIR]$/ { cmd_BIR(); next }
/^[.]BI / { cmd_XY("B","I"); next }
/^[.]br/ { cmd_br(); next }
/^[.]BR / { cmd_XY("B","R"); next }
/^[.]ce[ 0-9]*$/ { cmd_ce(); next }
/^[.]hw/ { cmd_hw(); next }
/^[.]IB / { cmd_XY("I","B"); next }
/^[.]IR / { cmd_XY("I","R"); next }
/^[.]ne/ { cmd_ne(); next }
/^[.]RB / { cmd_XY("R","B"); next }
/^[.]RI / { cmd_XY("R","I"); next }
/^[.]nf/ { cmd_nf(); next }
/^[.]fi/ { cmd_fi(); next }
/^[.]IP/ { cmd_IP(); next }
/^[.]LP/ { cmd_LP(); next }
/^[.]na/ { cmd_na(); next }
/^[.]PP/ { cmd_PP(); next }
/^[.]RE/ { cmd_RE(); next }
/^[.]RS/ { cmd_RS(); next }
/^[.]sp/ { cmd_sp(); next }
/^[.]TP/ { cmd_TP(); next }
/^[.]TS/ { cmd_TS(); next }
/^[.][A-Za-z]/ { cmd_unknown(); next }
{ print_line(strtohtml($0)) }
END { terminate(); }
# The anchor() function is adapted from my bibtex-to-html.awk file
function anchor(s,type,pattern,offset,prefix,save_label, name,rstart,rlength,save)
{
# Add anchors <A type="....">...</A> around text in s matching
# pattern. A non-zero offset discards that many characters from
# the start of the match, allowing the pattern to contain leading
# context which goes outside the anchored region. The prefix is
# attached to the start of the matched string, inside the value
# quotes in the anchor.
if (match(s,pattern))
{
rstart = RSTART # need private copies of these globals because
rlength = RLENGTH # recursion will change them
rstart += offset # adjust by offset to discard leading
rlength -= offset # context in pattern
name = substr(s,rstart,rlength)
sub(/ +at +/,"@",name) # reduce "user at host" to "user@host"
s = substr(s,1,rstart-1) \
"<A " type "=\"" prefix name "\">" \
((type == "NAME") ? "<STRONG>" : "") \
substr(s,rstart,rlength) \
((type == "NAME") ? "</STRONG>" : "") \
"</A>" \
anchor(substr(s,rstart+rlength),type,pattern,offset,prefix,save)
}
return (s)
}
function begin_toc()
{
print_toc("<H1>")
print_toc("Table of contents")
print_toc("</H1>")
print_toc("<UL>")
In_TOC_Item = 0
}
function cmd_ad()
{ # .ad: turn on adjust (flush-left-and-right justification)
cmd_comment($0) # no HTML equivalent
}
function cmd_B( s)
{
end_font()
if (match($0,/^[.]B *\"/))
{
s = substr($0,RSTART+RLENGTH)
gsub(/[" ]*$/,"",s)
print_line("<STRONG>" strtohtml(s) "</STRONG>")
}
else
print_line("<STRONG>" strtohtml($2) "</STRONG>")
}
function cmd_BIR()
{
end_font()
print_line(strtohtml("\\f" substr($0,2,1))) # Remap .B into \fB etc
}
function cmd_br()
{
cmd_PP()
}
function cmd_ce( k,n)
{
# .ce nnn: turn on centering for next nnn lines (nnn = 0 turns it off)
n = $2
cmd_comment($0)
if (n > 0)
{
# The HTML 3.2 grammar supports <CENTER> ... </CENTER> as a
# shorthand for the more general <DIV ALIGN=CENTER> ... </DIV>
# (CENTER can be replaced by LEFT or RIGHT). However, except
# for amaya (W3C's testbed for HTML 3.2), none of the current
# browsers support DIV. grail, hotjava, netscape all recognize
# CENTER. arena, chimera, lynx, and xmosaic do not recognize it
# either.
if (HTML == "3.2")
print_line("<CENTER>")
for (k = 1; k <= n; ++k)
{
getline
print_line(strtohtml($0) "<BR>")
}
if (HTML == "3.2")
print_line("</CENTER>")
}
}
function cmd_comment(s)
{
In_Comment = 1
sub(/^[.']\\"/,"",s) # remove troff comment prefix: it confuses html-pretty
print_line("<!-- " strtohtml(s) " -->")
In_Comment = 0
}
function cmd_comment_block(s)
{
cmd_comment(s)
In_Comment = 1
while (getline s > 0)
{
cmd_comment(s)
if (s ~ /^ *\\\}/)
break # found end of block
}
In_Comment = 0
}
function cmd_I( s)
{
end_font()
if (match($0,/^[.]I *\"/))
{
s = substr($0,RSTART+RLENGTH)
gsub(/[" ]*$/,"",s)
print_line("<EM>" strtohtml(s) "</EM>")
}
else
print_line("<EM>" strtohtml($2) "</EM>")
}
function cmd_IX()
{
# .IX index entry lines are simply discarded
while (match($0,/\\$/) && (getline > 0))
; # discard continuation lines
}
function cmd_fi()
{
end_font()
if (In_PRE)
{
print_line("</PRE>")
In_PRE = 0
}
else
cmd_comment($0)
}
function cmd_hw()
{ # .hw word-hyph-en-a-tion ex-cep-tions
cmd_comment($0)
}
function cmd_IP()
{
end_font()
PP++
print_line("<P>")
}
function cmd_LP()
{
end_font()
PP++
print_line("<P>")
}
function cmd_na()
{ # .na: no adjust: turn off flush-left-and-right justification, producing ragged-right
cmd_comment($0) # no HTML equivalent
}
function cmd_ne()
{ # .ne dimen: need dimen vertical space before end of page
# otherwise, force a page break (e.g. to prevent page
# breaks after headings)
cmd_comment($0)
}
function cmd_nf()
{
end_font()
if (In_PRE)
cmd_comment($0)
else
{
print_line("<PRE>")
In_PRE = 1
}
}
function cmd_PP()
{
end_font()
PP++
if (In_PRE) # <P> tags are illegal in <PRE>...</PRE> environments
print_line("")
else
print_line("<P>")
end_TP()
}
function cmd_R( s)
{
end_font()
if (match($0,/^[.]R *\"/))
{
s = substr($0,RSTART+RLENGTH)
gsub(/[" ]*$/,"",s)
print_line(strtohtml(s))
}
else
print_line(strtohtml($2))
}
function cmd_RE()
{
end_font()
if (In_PRE) # should not happen, but some man pages
cmd_fi() # are irregular
while (List_Level > RSE_List_Level[RSE_Level])
end_TP()
if (RSE_Level > 0)
RSE_Level--
print_line("</BLOCKQUOTE>")
}
function cmd_RS()
{
end_font()
RSE_List_Level[++RSE_Level] = List_Level
List_Level++ # new .TP level too
print_line("<BLOCKQUOTE>")
}
function cmd_SH( s)
{ # section heading
cmd_SH_SS("H1")
}
function cmd_SS( s)
{ # subsection heading
cmd_SH_SS("H2")
}
function cmd_SH_SS(tag, s)
{ # [sub]section heading
if (!TH_seen) # should not happen, but some man pages are
cmd_TH(substr($0,5)) # irregular
end_font()
while (RSE_Level > 0)
cmd_RE()
while (List_Level > 0)
end_TP()
if (tag == "H1")
{
H1++
if (H1 == 1)
begin_toc()
if (H2 > 0)
{
print_toc("</LI>")
print_toc("</UL>")
}
H2 = 0
if (H1 > 1)
print_line("<HR>") # a separating horizontal rule is a nice touch
}
else if (tag == "H2")
{
H2++
}
s = substr($0,5)
sub(/^ *\"/,"",s)
sub(/\" *$/,"",s)
s = strtohtml(s)
SH_SS_count = "." H1
if (H2 > 0)
SH_SS_count = SH_SS_count "." H2
print_line("<" tag ">")
print_line("<A NAME=\"HDR" SH_SS_count "\">")
print_line(s)
print_line("</A>")
print_line("</" tag ">")
if (In_TOC_Item && (H2 != 1))
print_toc("</LI>")
if (H2 == 1)
print_toc("<UL>")
In_TOC_Item = 1
print_toc("<LI>")
print_toc("<A HREF=\"#HDR" SH_SS_count "\">")
print_toc(s)
print_toc("</A>")
}
function cmd_sp()
{ # .sp nnn: vertical space
cmd_comment($0) # no sensible HTML equivalent
}
function cmd_TH( line)
{
end_font()
print_line("<HTML>")
print_line("<HEAD>")
print_line("<TITLE>")
line = $0
while (line ~ /\\$/)
{
getline
line = substr(line,1,length(line)-1) $0
}
print_line(strtohtml(substr(line,4)))
print_line("</TITLE>")
print_line("<LINK REV=\"made\" HREF=\"mailto:" LOGNAME "@" HOSTNAME "\">")
print_line("</HEAD>")
print_line("")
print_line("<BODY>")
print_line("")
TH_seen = 1
}
function cmd_TP()
{
end_font()
getline # this is the item label, usually "\(bu" or ".B ..."
if (Item_Count[List_Level] == 0) # then first item of new list
{
List_Level++
Item_Count[List_Level] = 0
if ($0 == "\\(bu")
{
List_Name[List_Level] = "UL"
List_Item[List_Level] = "LI"
}
else
{
List_Name[List_Level] = "DL"
List_Item[List_Level] = "DT"
}
if (Item_Count[List_Level] == 0)
print_line("<" List_Name[List_Level] ">")
}
Item_Count[List_Level]++
if (List_Name[List_Level] == "DL")
{
if (Item_Count[List_Level] > 1)
print_line("</DD>")
print_line("<DT>")
if ($0 ~ /^[.]B /)
cmd_B()
else if ($0 ~ /^[.]I /)
cmd_I()
else if ($0 ~ /^[.]R /)
cmd_R()
else if ($0 ~ /^[.]BR/)
cmd_XY("B","R")
else if ($0 ~ /^[.]BI/)
cmd_XY("B","I")
else if ($0 ~ /^[.]IB/)
cmd_XY("I","B")
else if ($0 ~ /^[.]IR/)
cmd_XY("I","R")
else if ($0 ~ /^[.]RB/)
cmd_XY("R","B")
else if ($0 ~ /^[.]RI/)
cmd_XY("R","I")
else
print_line(strtohtml($0))
end_font()
if (In_PRE) # should not happen, but some man pages
cmd_fi() # are irregular
print_line("</DT>")
print_line("<DD>")
}
else # must be <UL> <LI> ... </LI> </UL> type list
{
if (Item_Count[List_Level] > 1)
print_line("</LI>")
print_line("<LI>")
}
}
function cmd_TS( tbl_nroff_cmd)
{
# Copy the table to a temporary file
print $0 >TBLFILE
while (getline > 0)
{
print $0 >TBLFILE
if ($0 ~ /^[.]TE/) # then end of table found
break
}
close (TBLFILE)
# Run tbl, nroff, and col to convert the table to
# formatted text, and include it as a preformatted
# environment.
tbl_nroff_cmd = "tbl " TBLFILE " | nroff -man | col -b"
print_line("<PRE>")
while ((tbl_nroff_cmd | getline) > 0)
print_line(strtohtml($0))
print_line("</PRE>")
close (tbl_nroff_cmd)
delete_file(TBLFILE)
}
function cmd_unknown()
{
end_font()
warning("Unrecognized nroff/troff command in [" $0 "] changed to comment")
cmd_comment($0)
}
function cmd_XY(x,y, font,k)
{
end_font()
protect_quoted_args()
for (k = 2; k <= NF; ++k)
{
font = Font_Map[(k % 2) ? y : x]
printf("%s%s%s", html_font_begintag(font), strtohtml(unprotect_quoted_arg($k)), \
html_font_endtag(font)) > TMPFILE
}
print_line("")
}
function define(name, regexp)
{
# Typical values:
# .if n .ds Bi BibTeX
# .el .ds Bi BibTeX
# Macro used as \*(Bi, but stored as a regexp
regexp = "\\\\\\*\\(" name
Macro[regexp] = substr($0,index($0,name)+3)
}
function delete_file(s)
{
system("/bin/rm -f " s)
}
function end_font()
{
for (; Font_Level > 0; Font_Level--)
print_line(html_font_endtag(HTML_Font_Name[Font_Level]))
}
function end_toc()
{
print_toc("</LI>")
print_toc("</UL>")
print_toc("<HR>")
close (TOCFILE)
}
function end_TP()
{
if (Item_Count[List_Level] > 0)
{
if (List_Name[List_Level] == "DL")
{
print_line("</DD>")
print_line("</DL>")
}
else
{
print_line("</LI>")
print_line("</UL>")
}
}
Item_Count[List_Level] = 0
if (List_Level > 0)
List_Level--
}
function font_sub(s, tag)
{
while (match(s,/\\f[BCIPRST]/))
{
if (substr(s,RSTART+2,1) == "P") # revert to previous font
{
tag = html_font_endtag(HTML_Font_Name[Font_Level])
if (Font_Level > 0)
Font_Level--
}
else # set explicit font
{
Font_Level++
HTML_Font_Name[Font_Level] = Font_Map[substr(s,RSTART+2,1)]
tag = html_font_begintag(HTML_Font_Name[Font_Level])
# Handle ...\fB...\fR... style by ending previous font
if (Font_Level > 1)
{
tag = html_font_endtag(HTML_Font_Name[Font_Level-1]) tag
HTML_Font_Name[Font_Level-1] = HTML_Font_Name[Font_Level]
Font_Level--
}
}
s = substr(s,1,RSTART-1) tag substr(s,RSTART+3)
}
return (s)
}
function html_font_begintag(name)
{
if (name == "")
return ""
else
return "<" name ">"
}
function html_font_endtag(name)
{
if (name == "")
return ""
else
return "</" name ">"
}
function initialize()
{
# Change these two lines whenever the program is modified
VERSION_NUMBER = "1.06"
VERSION_DATE = "[24-Oct-1997]"
VERSION = "Version " VERSION_NUMBER " " VERSION_DATE
"echo $LOGNAME" | getline LOGNAME
"hostname" | getline HOSTNAME
"date" | getline DATE
if (HTML == "")
HTML = 2
if ((HTML != 2) && (HTML != 3) && (HTML != "3.2"))
{
warning("Unsupported HTML level " HTML " requested: defaulting to HTML level 2")
HTML = 2
}
Font_Map["B"] = "STRONG"
Font_Map["C"] = "TT"
Font_Map["I"] = "EM"
Font_Map["R"] = ""
Font_Map["S"] = "" # cannot map symbol font yet
Font_Map["T"] = "TT"
Macro["\\\\e"] = "\\"
if (HTML == 2)
Macro["\\\\0"] = "\\&#160;" # change non-breakable space to numeric entity
else if (HTML >= 3)
Macro["\\\\0"] = "\\&nbsp;" # can finally use named entity
else
warning("No conversion implemented for \\\\0 (non-breakable space) in HTML level", HTML)
TOCFILE = "/tmp/man2html.toc"
TBLFILE = "/tmp/man2html.tbl"
TMPFILE = "/tmp/man2html.tmp"
H1 = 0
H2 = 0
Macro["\\\\\\(bu"] = "\\&#164;"
Macro["\\\\\\(em"] = "---"
Macro["\\\\\\(en"] = "--"
# The following fragment for setting URL_xxx variables
# is borrowed intact from my bibtex-to-html.awk file:
#
# According to Internet RFC 1614 (May 1994), a URL is
# defined in the document T. Berners-Lee, ``Uniform
# Resource Locators'', March 1993, available at URL
# ftp://info.cern.ch/pub/ietf/url4.ps. Unfortunately,
# that address is no longer valid. However, I was able to
# track down pointers from http://www.w3.org/ to locate a
# suitable description in Internet RFC 1630 (June 1994).
# NB: We additionally disallow & in a URL because it is
# needed in SGML entities "&name;". We also disallow =
# and | because these are commonly used in \path=...= and
# \path|...| strings in BibTeX files. These restrictions
# could be removed if we went to the trouble of first
# encoding these special characters in %xy hexadecimal
# format, but they are rare enough that I am not going to
# do so for now. The worst that will happen from this
# decision is that an occasional URL in a BibTeX file will
# be missing a surrounding anchor.
# Bug fix [24-Oct-1997]: Add < and > to the set of excluded
# characters, to avoid incorrectly including SGML markup inside a
# URL. Before this fix, "\fChttp://www/\fP" got translated
# incorrectly to
# <TT><A HREF="http://www/</TT>">http://www/</TT></A>
# instead of the correct
# <TT><A HREF="http://www">http://www</A></TT>
URL_PATTERN = "[A-Za-z]+://[^ \",&=|<>]+"
URL_OFFSET = 0
URL_PREFIX = ""
URL_SAVE_LABEL = 0
E_MAIL_PATTERN = "[A-Za-z0-9_-]+@[A-Za-z0-9-]+([.][A-Za-z0-9-]+)*"
E_MAIL_OFFSET = 0
E_MAIL_PREFIX = "mailto:"
E_MAIL_SAVE_LABEL = 0
print_header()
}
function print_header()
{
print_line("<!-- Warning: Do NOT edit this file. -->")
print_line("<!-- It was created automatically by man2html.awk " VERSION " on " DATE " -->")
print_line("<!-- from the file " strtohtml(FILENAME) " at " HOSTNAME " -->")
print_line("")
if (HTML == 2)
print_line("<!DOCTYPE HTML public \"-//IETF//DTD HTML//EN\">")
else if (HTML == 3) # We need level 3 HTML only because of our use of &nbsp; and &shy;
print_line("<!DOCTYPE HTML public \"-//IETF//DTD HTML 3.0//EN\">")
else if (HTML == "3.2") # HTML 3.2 released 5-Nov-1996 at http://www.w3.org/pub/WWW
print_line("<!DOCTYPE HTML public \"-//W3C//DTD HTML 3.2//EN\">")
}
function print_line(s)
{
print s >TMPFILE
}
function print_toc(s)
{
print s >TOCFILE
}
function protect_quoted_args( inside,k,s)
{
if (index($0,"\"") == 0)
return
s = $0
inside = 0
for (k = 1; k <= length(s); ++k)
{
if (substr(s,k,1) == "\"")
inside = !inside
else if (inside && (substr(s,k,1) == " "))
s = substr(s,1,k-1) "\177" substr(s,k+1)
}
$0 = s
}
function strtohtml(s, name)
{
gsub(/\\$/,"",s) # discard backslash-newline
gsub(/\\-/,"-",s) # show troff minus as ASCII minus
gsub(/\\[&]/,"",s) # remove no-op macros
# gsub(/\\[|]/," ",s) # change thin space to space
gsub(/\\[|]/,"",s) # delete thin space (nroff does too)
gsub(/[&]/,"\\&amp;",s) # protect 3 or 4
gsub(/</,"\\&lt;",s) # special SGML
gsub(/>/,"\\&gt;",s) # characters
if (HTML == 2)
{
gsub(/\\ /,"\\&#160;",s)# represent literal space by numeric entity
gsub(/\\%/,"",s) # squeeze out discretionary hyphens
}
else if (HTML >= 3)
{
gsub(/\\ /,"\\&nbsp;",s) # preserve literal spaces
# NB: several browers fail to implement soft hyphen properly: they show
# it as an explicit hyphen when the word is not broken at end of line,
# instead of discarding it. We translate it correctly, and hope that
# broken browsers eventually get fixed, sigh...
gsub(/\\%/,"\\&shy;",s) # discretionary hyphen -> soft hyphen
}
if (In_Comment)
gsub(/--/,"__",s) # must hide -- pairs to avoid grammar error
else if (HTML == "3.2")
gsub(/\"/,"\\&#34;",s) # &quot; was left out of HTML 3.2, sigh...
else
gsub(/\"/,"\\&quot;",s) # but other versions, and SGML, have &quot;
# It is curious that browsers can display a bullet, but there is no
# HTML markup to represent it, and it is absent from the standard
# ISO8859-1 fonts
# gsub(/\\\(bu/,"\\&#164;",s) # change bullets to general currency sign
# &curren; but use numeric code because
# xmosaic does not recognize it
for (name in Macro) # substitute macro names
gsub(name,Macro[name],s)
s = font_sub(s)
gsub(/\\\\/,"\\",s) # reduce troff doubled backslash to single HTML one
# if (index(s,"\\") > 0) # check for anything we missed
# warning("Possible unrecognized nroff/troff markup in [" s "]")
if (!In_Comment) # no link inside comment; otherwise, browser shows text
{
s = anchor(s,"HREF",URL_PATTERN,URL_OFFSET,URL_PREFIX,URL_SAVE_LABEL)
s = anchor(s,"HREF",E_MAIL_PATTERN,E_MAIL_OFFSET,E_MAIL_PREFIX, \
E_MAIL_SAVE_LABEL)
}
return (s)
}
function terminate( x,y)
{
print_line("</BODY>")
print_line("</HTML>")
close (TMPFILE)
end_toc()
while (getline x < TMPFILE > 0)
{
if (x == "<H1>")
break
print x
}
while (getline y < TOCFILE > 0)
print y
close (TOCFILE)
delete_file(TOCFILE)
print x
while (getline x < TMPFILE > 0)
print x
close (TMPFILE)
delete_file(TMPFILE)
}
function unprotect_quoted_arg(s)
{
sub(/^"/,"",s) # remove leading and
sub(/"$/,"",s) # trailing quotes and
gsub(/\177/," ",s) # restore spaces
return (s)
}
function warning(message)
{
print FILENAME ":" FNR ":%%" message >"/dev/stderr"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment