alexm/table_extract.pl

## table_extract.pl
#!/usr/bin/env perl

use strict;
use warnings;
use HTML::TableExtract;

# Can parse bad formed HTML too!
my $html = "<table><tr><td>foo<td>bar<tr><td>42<td>3.14\n";

my $te = HTML::TableExtract->new();
$te->parse($html);
my @tables = $te->tables;

print $tables[0]->cell(0,0), "\n";
print $tables[0]->cell(0,1), "\n";
print $tables[0]->cell(1,0), "\n";
print $tables[0]->cell(1,1), "\n";

## table_extract_to_csv.pl
#!/usr/local/bin/perl
use strict;
use warnings;
use utf8::all;
use v5.10;
use LWP::Simple;
use HTML::TableExtract;
use String::Util qw< trim >;
use Text::CSV;

my $page = "http://www.tuixent-lavansa.com/pagina/inicial";
my $html = get( $page );

my $te = HTML::TableExtract->new();
$te->parse($html);
my @tables = $te->tables;
my @rows = $tables[1]->rows;
shift @rows;

my @titles = map { trim( $_->[0] ) } @rows;
my @values = map { trim( $_->[1] ) } @rows;

my $csv = Text::CSV->new ( { binary => 1, eol => $/ } ) or die;
$csv->print( \*STDOUT, \@titles );
$csv->print( \*STDOUT, \@values );
	#!/usr/bin/env perl

	use strict;
	use warnings;
	use HTML::TableExtract;

	# Can parse bad formed HTML too!
	my $html = "<table><tr><td>foo<td>bar<tr><td>42<td>3.14\n";

	my $te = HTML::TableExtract->new();
	$te->parse($html);
	my @tables = $te->tables;

	print $tables[0]->cell(0,0), "\n";
	print $tables[0]->cell(0,1), "\n";
	print $tables[0]->cell(1,0), "\n";
	print $tables[0]->cell(1,1), "\n";
	#!/usr/local/bin/perl
	use strict;
	use warnings;
	use utf8::all;
	use v5.10;
	use LWP::Simple;
	use HTML::TableExtract;
	use String::Util qw< trim >;
	use Text::CSV;

	my $page = "http://www.tuixent-lavansa.com/pagina/inicial";
	my $html = get( $page );

	my $te = HTML::TableExtract->new();
	$te->parse($html);
	my @tables = $te->tables;
	my @rows = $tables[1]->rows;
	shift @rows;

	my @titles = map { trim( $_->[0] ) } @rows;
	my @values = map { trim( $_->[1] ) } @rows;

	my $csv = Text::CSV->new ( { binary => 1, eol => $/ } ) or die;
	$csv->print( \*STDOUT, \@titles );
	$csv->print( \*STDOUT, \@values );