Skip to content

Instantly share code, notes, and snippets.

@lopnor
Created April 27, 2009 13:52
Show Gist options
  • Save lopnor/102495 to your computer and use it in GitHub Desktop.
Save lopnor/102495 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
use strict;
use warnings;
use utf8;
use Web::Scraper;
use LWP::Simple ();
use YAML;
use URI;
binmode(STDOUT, ':utf8');
my $cat = scraper {
process '//div[@class="lnaviSl"]//li/a' => 'cat[]' => '@href';
result 'cat';
}->scrape(URI->new('http://imode-press.jp/imode/top/sitelist/03page205.htm'));
for my $c (@$cat) {
my @subcat = find_subcat($c);
for my $sc (@subcat) {
my @sites = get_siteinfo($sc);
printf "%s\t%s\t%s\t%s\n", $sc, $_->{name}, $_->{qrcode}, decodeqr($_->{qrcode}) for @sites;
}
}
sub get_siteinfo {
my $url = shift;
my $result = scraper {
process '//div[@class="siteBoxAll"]' => 'site[]' => scraper {
process '//img[@height="75"]' => qrcode => '@src';
process '//strong' => name => 'TEXT';
};
process '//div[@class="slSubNavi02"]/span[@class="smallTxt"]/a[text()="次ページ"]', 'nextpage' => '@href';
}->scrape($url);
# my @ret = map {$_->{qrcode}} @{$result->{site}};
my @ret = @{$result->{site}};
push @ret, get_siteinfo($result->{nextpage}) if ($result->{nextpage});
return @ret;
}
sub find_subcat {
my $url = shift;
my @ret;
my $sc = scraper {
process '//div[@class="slSubNavi01"]/table//a' => 'subcat[]' => '@href';
result 'subcat';
}->scrape($url);
for (@$sc) {
my @sub = find_subcat($_);
push @ret, (@sub ? @sub : $_);
}
return @ret;
}
sub decodeqr {
my $qrcode = shift;
my $url = LWP::Simple::get("http://kitchen.soffritto.org/~danjou/decodeqr.cgi?$qrcode");
chomp $url;
return $url;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment