Skip to content

Instantly share code, notes, and snippets.

@Vany
Created July 15, 2019 15:39
Show Gist options
  • Save Vany/8fbf038782cdf5bbd9a261f6f80cc525 to your computer and use it in GitHub Desktop.
Save Vany/8fbf038782cdf5bbd9a261f6f80cc525 to your computer and use it in GitHub Desktop.
fb2 missed links finder
use 5.030;
use Data::Dumper;
use Getopt::Long;
use XML::SAX;
my $infile = shift @ARGV;
die "Unexistent file" unless -f $infile;
my $parser = XML::SAX::ParserFactory->parser( Handler => FB2SAXHandler->new );
$parser->parse_uri($infile);
my $h = $parser->{Handler};
while (my ($k, $v) = each %{ $h->{refs} }) {
delete $h->{links}->{'#'.$k};
}
$h->{stats}->{missed_links} += $_ for values %{ $h->{links} };
warn Dumper($h->{stats});
warn Dumper($h->{links});
package FB2SAXHandler;
use utf8;
use 5.030;
use base qw(XML::SAX::Base);
sub new {
my $class = shift;
no strict 'refs';
my $self = $class->SUPER::new(@_);
return $self
}
sub start_document {
my ($self, $doc) = @_;
$self->{stats} = { };
$self->{refs} = {};
$self->{links}= {};
}
sub start_element {
my ($self, $el) = @_;
return unless $el->{Attributes};
$self->{binary} = $el->{Name} eq "binary";
while (my($k, $v) = each %{$el->{Attributes}} ) {
if ($k =~ /}id$/) {
$self->{refs}->{$v->{Value}} = 1;
} elsif ($k =~ /}href$/) {
$self->{stats}->{links}++;
$self->{links}->{$v->{Value}}++;
}
}
}
sub characters {
my ($self, $d) = @_;
my $s = $self->{stats};
if ($self->{binary}) {
$s->{binary_length} += length($d->{Data});
return;
}
$s->{raw_length} += length($d->{Data});
my @letters = ( $d->{Data} =~ /(\S)/gm );
$s->{characters} += 0+@letters;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment