Created
May 11, 2013 17:18
-
-
Save dalang/5560669 to your computer and use it in GitHub Desktop.
parse data from baidu map, parse for "苍南合作银行" sites info and output these info into a file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/perl | |
use HTML::Element; | |
use HTML::TreeBuilder; | |
use Encode; | |
#binmode DATA, "utf8"; | |
#$tree->parse_file(\*DATA); | |
#print Dumper($tree), "\n"; | |
open(FILE, ">hzyh.txt"); | |
foreach my $file_name (@ARGV) { | |
my $tree = HTML::TreeBuilder->new; | |
$tree->parse_file($file_name); | |
#print $head->attr('_parent')->as_text(); ##_parent属性的值是html节点 | |
#$p = $head->look_down('tid', 'poiFavBtn_1'); | |
$table_counter = 0; | |
$counter = 0; | |
foreach my $table($tree->find_by_tag_name("table")) { | |
#print "page".$table_counter."\n"; | |
my $row_counter = 0; | |
foreach my $row($table->find_by_tag_name("tr")) { | |
if (!($row->attr('id') =~ /item-td-/)) { | |
next; | |
} | |
#print "row".$row_counter." index".$counter."\n"; | |
my @list = $row->look_down('tid',"poiFavBtn_".$row_counter); | |
# print "### ".scalar(@list)."\n"; | |
# last if scalar(@list) != 1; | |
# print $list[0]->attr('onclick'); | |
if ($list[0]->attr('onclick') =~ /goFav\((.*?)\);return/) | |
{ | |
$tmp = $1; | |
#$dat = "苍南农村合作银行"; | |
#$str = decode("gbk", $dat); | |
#print $str; | |
#$tmp =~ s/$str//g; | |
$tmp =~ s/<br\/>//g; | |
syswrite(FILE, $tmp.",\n"); | |
print $tmp."\n"; | |
} | |
#print "\n"; | |
$row_counter += 1; | |
$counter += 1; | |
} | |
$table_counter += 1; | |
} | |
#print $p->as_text(); | |
$tree = $tree->delete; | |
} | |
close(FILE); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment