Skip to content

Instantly share code, notes, and snippets.

@dalang
Created May 11, 2013 17:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dalang/5560669 to your computer and use it in GitHub Desktop.
Save dalang/5560669 to your computer and use it in GitHub Desktop.
parse data from baidu map, parse for "苍南合作银行" sites info and output these info into a file
#! /usr/bin/perl
use HTML::Element;
use HTML::TreeBuilder;
use Encode;
#binmode DATA, "utf8";
#$tree->parse_file(\*DATA);
#print Dumper($tree), "\n";
open(FILE, ">hzyh.txt");
foreach my $file_name (@ARGV) {
my $tree = HTML::TreeBuilder->new;
$tree->parse_file($file_name);
#print $head->attr('_parent')->as_text(); ##_parent属性的值是html节点
#$p = $head->look_down('tid', 'poiFavBtn_1');
$table_counter = 0;
$counter = 0;
foreach my $table($tree->find_by_tag_name("table")) {
#print "page".$table_counter."\n";
my $row_counter = 0;
foreach my $row($table->find_by_tag_name("tr")) {
if (!($row->attr('id') =~ /item-td-/)) {
next;
}
#print "row".$row_counter." index".$counter."\n";
my @list = $row->look_down('tid',"poiFavBtn_".$row_counter);
# print "### ".scalar(@list)."\n";
# last if scalar(@list) != 1;
# print $list[0]->attr('onclick');
if ($list[0]->attr('onclick') =~ /goFav\((.*?)\);return/)
{
$tmp = $1;
#$dat = "苍南农村合作银行";
#$str = decode("gbk", $dat);
#print $str;
#$tmp =~ s/$str//g;
$tmp =~ s/<br\/>//g;
syswrite(FILE, $tmp.",\n");
print $tmp."\n";
}
#print "\n";
$row_counter += 1;
$counter += 1;
}
$table_counter += 1;
}
#print $p->as_text();
$tree = $tree->delete;
}
close(FILE);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment