Skip to content

Instantly share code, notes, and snippets.

@alexm
Last active December 28, 2015 23:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alexm/7577331 to your computer and use it in GitHub Desktop.
Save alexm/7577331 to your computer and use it in GitHub Desktop.
HTML::TableExtract proof of concept
#!/usr/bin/env perl
use strict;
use warnings;
use HTML::TableExtract;
# Can parse bad formed HTML too!
my $html = "<table><tr><td>foo<td>bar<tr><td>42<td>3.14\n";
my $te = HTML::TableExtract->new();
$te->parse($html);
my @tables = $te->tables;
print $tables[0]->cell(0,0), "\n";
print $tables[0]->cell(0,1), "\n";
print $tables[0]->cell(1,0), "\n";
print $tables[0]->cell(1,1), "\n";
#!/usr/local/bin/perl
use strict;
use warnings;
use utf8::all;
use v5.10;
use LWP::Simple;
use HTML::TableExtract;
use String::Util qw< trim >;
use Text::CSV;
my $page = "http://www.tuixent-lavansa.com/pagina/inicial";
my $html = get( $page );
my $te = HTML::TableExtract->new();
$te->parse($html);
my @tables = $te->tables;
my @rows = $tables[1]->rows;
shift @rows;
my @titles = map { trim( $_->[0] ) } @rows;
my @values = map { trim( $_->[1] ) } @rows;
my $csv = Text::CSV->new ( { binary => 1, eol => $/ } ) or die;
$csv->print( \*STDOUT, \@titles );
$csv->print( \*STDOUT, \@values );
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment