Skip to content

Instantly share code, notes, and snippets.

@gustawdaniel
Last active February 14, 2017 02:36
Show Gist options
  • Save gustawdaniel/a4bb55473e8e4399a5b087f1979e78d0 to your computer and use it in GitHub Desktop.
Save gustawdaniel/a4bb55473e8e4399a5b087f1979e78d0 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
use warnings;
use strict;
use HTML::TagParser;
my $url = 'http://blog.gustawdaniel.pl';
my @tags = ("h1 h2 h3 h4 li p","pre");
print "| text | code | title \n";
my @list = HTML::TagParser->new( $url )->getElementsByTagName( "h2" );
foreach my $elem ( @list ) {
my $post = HTML::TagParser->new( $url.$elem->firstChild()->getAttribute( "href" ) );
my @str = ("","");
foreach my $i ( (0,1) ) {
my @elements = map {$post->getElementsByTagName($_)} split / /, $tags[$i];
$str[$i] = join("",map {$_->innerText} @elements);
}
printf("| %8d | %8d | %-60s \n", (map {$str[$_] =~ y===c} (0,1)), $elem->innerText);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment