Last active
August 29, 2015 14:22
-
-
Save lindleyw/dcafb39f7e7cb8a72198 to your computer and use it in GitHub Desktop.
An example of generating an index, for a complete Perl susbystem like the Mojolicious framework, from its collection of *.pm and *.pod files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/env/perl | |
use Pod::Simple::SimpleTree; | |
use warnings; | |
use strict; | |
use v5.20; | |
use feature 'signatures'; | |
no warnings 'experimental::signatures'; | |
use Data::Dumper; | |
# Compare, | |
# CPAN::IndexPod | |
# Pod::Index | |
# | |
# Probably ought to regard X<…> entities although metacpan doesn't create <a> for them. | |
# | |
# Original one-liner (sh) | |
# grep -r head2 . | perl -n -e 'chomp; s/:=head./:/; s{\./}{}; my($fil,$head)=split /:/;$href=$head;$href=~s/^\s//;$href=~s/\s/-/g;$fil=~s{\..+}{}g;$fil=~s{/}{::}g;print "$head: <a href=https://metacpan.org/pod/$fil#$href>$fil</a><br>\n"' | sort | uniq >/tmp/f.html | |
# print Dumper($pod->root); | |
# Evil global variables | |
my $current_file; | |
my $current_heading; | |
sub plaintext_of (@subnodes) { | |
my $accreted_text = ''; | |
foreach my $n (@subnodes) { | |
if (!ref $n) { # string | |
$accreted_text .= $n; | |
} else { # array ref, presumably | |
my @subs = (@{$n}); | |
shift @subs; shift @subs; | |
$accreted_text .= plaintext_of(@subs); | |
} | |
} | |
return $accreted_text; | |
} | |
sub strip_extension ($filename) { | |
$filename =~ s/\.\w+$//; # remove extension | |
return $filename; | |
} | |
# sub convert_to_ | |
sub convert_to_href_text ($human_text) { | |
$human_text =~ s/(\s|\(|=|\[)/-/g; | |
$human_text =~ s/([^a-zA-Z0-9_\-*:])//g; | |
return $human_text; | |
} | |
sub convert_local_path_to_href ($file) { | |
$file =~ s{\..+}{}g; | |
$file =~ s{/}{::}g; | |
return $file; | |
} | |
# More evil globals. | |
# Maps local filename to manpage name. Based on the =head1 NAME value. | |
my %file_manpage; | |
# All defined references. | |
my %references; | |
sub save_definition ($in_file, $refname, $display_name = $refname) { | |
$references{$display_name}{strip_extension($in_file)} = | |
convert_to_href_text($refname); | |
} | |
sub save_file_manpage ($filename, $manpage) { | |
$file_manpage{strip_extension($current_file)} = $manpage; | |
} | |
my $save_next_text_as_module_name = 0; | |
sub parse_entity ($element, $attrs, @subnodes) { | |
if ($element =~ /^head(\d)/) { | |
my $level = $1; | |
$current_heading = plaintext_of(@subnodes); | |
if ($level == 1 && (lc($current_heading) eq 'name')) { | |
$save_next_text_as_module_name = 1; | |
} elsif ($level == 2) { | |
save_definition ( $current_file, $current_heading ); | |
} | |
} elsif ($save_next_text_as_module_name && $element =~ /^para$/i ) { | |
$save_next_text_as_module_name = 0; | |
$current_heading = plaintext_of(@subnodes); | |
$current_heading =~ m/^\s*(\S+)/; | |
my $firstword = $1; | |
save_file_manpage($current_file, $firstword); | |
} | |
} | |
sub parse_document ($, $, @entities) { | |
$current_heading = undef; | |
$save_next_text_as_module_name = 0; | |
foreach my $ent (@entities) { | |
parse_entity (@{$ent}); | |
} | |
} | |
# Accept a list of module names. e.g., "Mojo*" | |
# From this get a list of filenames in the Perl directory | |
# Create a hash whose keys are all the (*.pm, *.pod) files under those pathnames. | |
# Give POD files preference: remove any x/y.pm where x/y.pod exists. | |
use File::Find::Rule; | |
use Mojo::Path; | |
# This expects the user to specify one or more directory paths. | |
# We will index all the *.pm and *.pod therein. | |
my @file_list; | |
foreach my $index_it (@ARGV) { | |
my $index_path = Mojo::Path->new($index_it); | |
my $index_dir = $index_path->to_dir; | |
my @files = File::Find::Rule->file() | |
->name( '*.pm', '*.pod' ) | |
->in( $index_dir ); | |
push @file_list, @files; | |
} | |
foreach my $parse_file (@file_list) { | |
$current_file = $parse_file; | |
my $pod = Pod::Simple::SimpleTree->new->parse_file($parse_file); | |
parse_document(@{$pod->root}); | |
} | |
sub clean_heading ($original) { | |
# Clean headings for index display | |
$original =~ s/\smean\?$//; | |
if ($original =~ m/^((?:(?:who|what|when|where|why|how|is|are|a|an|do|does|don't|doesn't|not|I|need|to|about|did|my|the)\s+|error\s+"|message\s+")+)(.*)$/i) { | |
my ($prefix, $main) = ($1, ucfirst($2)); | |
$main =~ s/[?"]//g; | |
# $prefix =~ s/[?"]//g; | |
return $main; | |
} | |
if ($original =~ m/^(\w+ing)\s+(a|an|the|some|any|all|to|from|your)?\b(.*)$/) { | |
my ($verb, $qualifier, $remainder) = ($1, $2, $3); | |
$qualifier ||= ''; | |
# print ucfirst("$remainder, $verb $qualifier\n"); | |
return ucfirst("$remainder, $verb $qualifier"); | |
} | |
return $original; | |
} | |
foreach my $r (sort {fc($a) cmp fc($b)} keys %references) { | |
my $new = clean_heading($r); | |
if ($new ne $r) { | |
foreach my $orig_file (keys %{$references{$r}}) { | |
save_definition( $orig_file, $r, $new); | |
} | |
} | |
} | |
print "<html><head><title>Cross-reference</title></head><body>\n"; | |
print "<dl>\n"; | |
foreach my $r (sort {fc($a) cmp fc($b)} keys %references) { | |
print " <dt>$r</dt>\n<dd>"; | |
my @sources; | |
foreach my $orig_file (sort {fc($a) cmp fc($b)} keys %{$references{$r}}) { | |
my $manpage = $file_manpage{$orig_file}; | |
# my $href = | |
push @sources, qq(<a href="https://metacpan.org/pod/${manpage}#$references{$r}{$orig_file}">$manpage</a>); | |
} | |
print join(', ', @sources); | |
print "</dd>\n"; | |
} | |
print "</dl>\n"; | |
print "</body></html>\n"; | |
1; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment