Skip to content

Instantly share code, notes, and snippets.

@lindleyw
Last active August 29, 2015 14:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lindleyw/dcafb39f7e7cb8a72198 to your computer and use it in GitHub Desktop.
Save lindleyw/dcafb39f7e7cb8a72198 to your computer and use it in GitHub Desktop.
An example of generating an index, for a complete Perl susbystem like the Mojolicious framework, from its collection of *.pm and *.pod files
#!/usr/env/perl
use Pod::Simple::SimpleTree;
use warnings;
use strict;
use v5.20;
use feature 'signatures';
no warnings 'experimental::signatures';
use Data::Dumper;
# Compare,
# CPAN::IndexPod
# Pod::Index
#
# Probably ought to regard X<…> entities although metacpan doesn't create <a> for them.
#
# Original one-liner (sh)
# grep -r head2 . | perl -n -e 'chomp; s/:=head./:/; s{\./}{}; my($fil,$head)=split /:/;$href=$head;$href=~s/^\s//;$href=~s/\s/-/g;$fil=~s{\..+}{}g;$fil=~s{/}{::}g;print "$head: <a href=https://metacpan.org/pod/$fil#$href>$fil</a><br>\n"' | sort | uniq >/tmp/f.html
# print Dumper($pod->root);
# Evil global variables
my $current_file;
my $current_heading;
sub plaintext_of (@subnodes) {
my $accreted_text = '';
foreach my $n (@subnodes) {
if (!ref $n) { # string
$accreted_text .= $n;
} else { # array ref, presumably
my @subs = (@{$n});
shift @subs; shift @subs;
$accreted_text .= plaintext_of(@subs);
}
}
return $accreted_text;
}
sub strip_extension ($filename) {
$filename =~ s/\.\w+$//; # remove extension
return $filename;
}
# sub convert_to_
sub convert_to_href_text ($human_text) {
$human_text =~ s/(\s|\(|=|\[)/-/g;
$human_text =~ s/([^a-zA-Z0-9_\-*:])//g;
return $human_text;
}
sub convert_local_path_to_href ($file) {
$file =~ s{\..+}{}g;
$file =~ s{/}{::}g;
return $file;
}
# More evil globals.
# Maps local filename to manpage name. Based on the =head1 NAME value.
my %file_manpage;
# All defined references.
my %references;
sub save_definition ($in_file, $refname, $display_name = $refname) {
$references{$display_name}{strip_extension($in_file)} =
convert_to_href_text($refname);
}
sub save_file_manpage ($filename, $manpage) {
$file_manpage{strip_extension($current_file)} = $manpage;
}
my $save_next_text_as_module_name = 0;
sub parse_entity ($element, $attrs, @subnodes) {
if ($element =~ /^head(\d)/) {
my $level = $1;
$current_heading = plaintext_of(@subnodes);
if ($level == 1 && (lc($current_heading) eq 'name')) {
$save_next_text_as_module_name = 1;
} elsif ($level == 2) {
save_definition ( $current_file, $current_heading );
}
} elsif ($save_next_text_as_module_name && $element =~ /^para$/i ) {
$save_next_text_as_module_name = 0;
$current_heading = plaintext_of(@subnodes);
$current_heading =~ m/^\s*(\S+)/;
my $firstword = $1;
save_file_manpage($current_file, $firstword);
}
}
sub parse_document ($, $, @entities) {
$current_heading = undef;
$save_next_text_as_module_name = 0;
foreach my $ent (@entities) {
parse_entity (@{$ent});
}
}
# Accept a list of module names. e.g., "Mojo*"
# From this get a list of filenames in the Perl directory
# Create a hash whose keys are all the (*.pm, *.pod) files under those pathnames.
# Give POD files preference: remove any x/y.pm where x/y.pod exists.
use File::Find::Rule;
use Mojo::Path;
# This expects the user to specify one or more directory paths.
# We will index all the *.pm and *.pod therein.
my @file_list;
foreach my $index_it (@ARGV) {
my $index_path = Mojo::Path->new($index_it);
my $index_dir = $index_path->to_dir;
my @files = File::Find::Rule->file()
->name( '*.pm', '*.pod' )
->in( $index_dir );
push @file_list, @files;
}
foreach my $parse_file (@file_list) {
$current_file = $parse_file;
my $pod = Pod::Simple::SimpleTree->new->parse_file($parse_file);
parse_document(@{$pod->root});
}
sub clean_heading ($original) {
# Clean headings for index display
$original =~ s/\smean\?$//;
if ($original =~ m/^((?:(?:who|what|when|where|why|how|is|are|a|an|do|does|don't|doesn't|not|I|need|to|about|did|my|the)\s+|error\s+"|message\s+")+)(.*)$/i) {
my ($prefix, $main) = ($1, ucfirst($2));
$main =~ s/[?"]//g;
# $prefix =~ s/[?"]//g;
return $main;
}
if ($original =~ m/^(\w+ing)\s+(a|an|the|some|any|all|to|from|your)?\b(.*)$/) {
my ($verb, $qualifier, $remainder) = ($1, $2, $3);
$qualifier ||= '';
# print ucfirst("$remainder, $verb $qualifier\n");
return ucfirst("$remainder, $verb $qualifier");
}
return $original;
}
foreach my $r (sort {fc($a) cmp fc($b)} keys %references) {
my $new = clean_heading($r);
if ($new ne $r) {
foreach my $orig_file (keys %{$references{$r}}) {
save_definition( $orig_file, $r, $new);
}
}
}
print "<html><head><title>Cross-reference</title></head><body>\n";
print "<dl>\n";
foreach my $r (sort {fc($a) cmp fc($b)} keys %references) {
print " <dt>$r</dt>\n<dd>";
my @sources;
foreach my $orig_file (sort {fc($a) cmp fc($b)} keys %{$references{$r}}) {
my $manpage = $file_manpage{$orig_file};
# my $href =
push @sources, qq(<a href="https://metacpan.org/pod/${manpage}#$references{$r}{$orig_file}">$manpage</a>);
}
print join(', ', @sources);
print "</dd>\n";
}
print "</dl>\n";
print "</body></html>\n";
1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment