Skip to content

Instantly share code, notes, and snippets.

@cemeyer
Created October 17, 2015 23:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cemeyer/d31af5bbe9490c6a275c to your computer and use it in GitHub Desktop.
Save cemeyer/d31af5bbe9490c6a275c to your computer and use it in GitHub Desktop.
Convert the raw output of man(1) to HTML-ish
#!/usr/bin/env perl
# Extracted from 2-clause BSDL https://www.freebsd.org/cgi/man.cgi/source
#
# Usage example:
# $ man 3 tree | path/to/man2html.pl > tree.3.html
# encode unknown text data for using as HTML,
# treats ^H as overstrike ala nroff.
sub encode_data {
local ($_) = @_;
local ($str);
# Escape &, < and >
s,\010[><&],,g;
s/\&/\&amp\;/g;
s/\</\&lt\;/g;
s/\>/\&gt\;/g;
s,((_\010[^_])+),($str = $1) =~ s/.\010//g; "<i>$str</i>";,ge;
s,((.\010.)+),($str = $1) =~ s/.\010//g; "<b>$str</b>";,ge;
#s,((_\010.)+),($str = $1) =~ s/.\010//g; "<i>$str</i>";,ge;
#s,(.\010)+,$1,g;
#if (!s,((.\010.)+\s+(.\010.)+),($str = $1) =~ s/.\010//g; "<b>$str</b>";,ge) {
# s,(([^_]\010.)+),($str = $1) =~ s/[^_]\010//g; "<b>$str</b>";,ge;
# s,(([_]\010.)+),($str = $1) =~ s/[_]\010//g; "<i>$str</i>";,ge;
#}
# Escape binary data except for ^H which we process below
# \375 gets turned into the & for the entity reference
#s/([^\010\012\015\032-\176])/sprintf('\375#%03d;',ord($1))/eg;
# Process ^H sequences, we use \376 and \377 (already escaped
# above) to stand in for < and > until those characters can
# be properly escaped below.
#s,\376[IB]\377_\376/[IB]\377,,g;
#s/.[\b]//g; # just do an erase for anything else
# Now convert our magic chars into our tag markers
#s/\375/\&/g; s/\376/</g; s/\377/>/g;
# cleanup all the rest
s,.\010,,g;
$_;
}
sub encode_url {
local ($_) = @_;
# rfc1738 says that ";"|"/"|"?"|":"|"@"|"&"|"=" may be reserved.
# And % is the escape character so we escape it along with
# single-quote('), double-quote("), grave accent(`), less than(<),
# greater than(>), and non-US-ASCII characters (binary data),
# and white space. Whew.
s/([\000-\032\;\/\?\:\@\&\=\%\'\"\`\<\>\177-\377 ])/sprintf('%%%02x',ord($1))/eg;
s/%20/+/g;
$_;
}
sub mlnk {
local ($matched) = @_;
local ( $link, $section );
( $link = $matched ) =~ s/[\s]+//g;
$link =~ s/<\/?[IB]>//ig;
( $link, $section ) = ( $link =~ m/^([^\(]*)\((.*)\)/ );
$link = &encode_url($link);
$section = &encode_url($section);
local ($manpath) = &encode_url($manpath);
return qq{<a href="$BASE?query=$link}
. qq{&amp;sektion=$section&amp;apropos=0&amp;manpath=$manpath">$matched</a>};
}
local ($space) = 1;
local (@sect);
local ( $i, $j );
print "<html>\n<body>\n";
print "<pre>\n";
while (<>) {
# remove tailing white space
if (/^\s+$/) {
next if $space;
$space = 1;
}
else {
$space = 0;
}
$_ = &encode_data($_);
/^\s/ && # skip headers
s,((<[IB]>)?[\w\_\.\-]+\s*(</[IB]>)?\s*\(([1-9ln][a-zA-Z]*)\)),&mlnk($1),oige;
# detect E-Mail Addreses in manpages
if ( $enable_mailto_links && /\@/ ) {
s/([a-z0-9_\-\.]+\@[a-z0-9\-\.]+\.[a-z]+)/<a href="mailto:$1">$1<\/A>/gi;
}
# detect URLs in manpages
if (m%tp://%) {
s,((ftp|http)://[^\s<>\)]+),<a href="$1">$1</a>,gi;
}
if (s%^(<b>.*?</b>)+\n?$% ($str = $1) =~ s,(<b>|</b>),,g; $str%ge) {
$i = $_;
$j = &encode_url($i);
$j =~ s/\+/_/g;
$_ = qq{<a name="$j" href="#end"><b>$i</b></a>\n};
push( @sect, $i );
}
print;
}
print "</pre>\n</body>\n</html>\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment