Skip to content

Instantly share code, notes, and snippets.

@gryftir
Created May 18, 2014 03:31
Show Gist options
  • Save gryftir/2b15c2d62c70d8431e4c to your computer and use it in GitHub Desktop.
Save gryftir/2b15c2d62c70d8431e4c to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
use warnings;
use strict;
use WWW::Mechanize;
use HTML::TreeBuilder;
my $url = "http://www.google.com";
my $mech = WWW::Mechanize->new();
$mech->get($url);
my $tree = HTML::TreeBuilder->new_from_content($mech->content());
print $tree->as_text();
print "\n\n";
print as_text($tree, delimiter => " ");
sub as_text {
# Yet another iteratively implemented traverser
my ( $this, %options ) = @_;
my $skip_dels = $options{'skip_dels'} || 0;
my (@pile) = ($this);
my $tag;
my $text = '';
my $delimiter = $options{delimiter} || undef;
my $nillio = [];
while (@pile) {
if ( !defined( $pile[0] ) ) { # undef!
shift @pile; # how did that get in here?
}
elsif ( !ref( $pile[0] ) ) { # text bit! save it!
$text .= shift @pile;
$text .= $delimiter if defined ($delimiter);
}
else { # it's a ref -- traverse under it
unshift @pile, @{ $this->{'_content'} || $nillio }
unless ( $tag = ( $this = shift @pile )->{'_tag'} ) eq 'style'
or $tag eq 'script'
or ( $skip_dels and $tag eq 'del' );
}
}
return $text;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment