Skip to content

Instantly share code, notes, and snippets.

@coolniikou
Created May 29, 2009 11:59
Show Gist options
  • Save coolniikou/119914 to your computer and use it in GitHub Desktop.
Save coolniikou/119914 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
use strict;
use Web::Scraper;
use URI;
use YAML::Syck;
use utf8;
use Encode;
## scrap artist and title from amazon.com mp3 downloaded ranking
my $uri = URI->new('http://www.amazon.com/gp/bestsellers/dmusic/digital-music-track/ref=pd_ts_dmusic_nav');
my $scraper = scraper {
process '//div[2]//div//td[3]', 'lists[]' => scraper {
process '//strong/a',
'artist' => 'TEXT';
process '//a[last()-1]',
'title' => 'TEXT';
}
};
my $res = $scraper->scrape($uri);
my $feed = {
title => 'amazonmp3',
link => $uri->as_string,
type => 'amazon',
};
for my $entry (@{ $res->{lists} }){
push @{$feed->{entries}},{
title => $entry->{artist},
body => $entry->{title},
};
}
use YAML;
binmode STDOUT, ":utf8";
print Dump $feed;
__END__
#!/usr/bin/perl
use strict;
use warnings;
use Web::Scraper;
use URI;
use YAML::Syck;
use utf8;
use Encode;
use WWW::Mechanize;
## scrap aritsts and title from yahoo video chart.
my $uri = 'http://new.music.yahoo.com/chart/?itemtype=video&gname=All+Categories&genreID=0&count=20&pageID=1';
my $mech = new WWW::Mechanize;
$mech->get($uri);
my @links = $mech->find_all_links( url_regex => qr/pageID\=\d{1}$/) ;
my $s = scraper {
process '//div[@class="ymusic-charts-metadata"]',
'entries[]' => scraper {
process '//a[@class="ymusic-link-title ymusic-ellipsis"]',
'title' => 'TEXT';
process '//a[@class="ymusic-link-subtitle ymusic-ellipsis"]',
'artist' => 'TEXT';
};
result qw ( entries );
};
my $feed = {
title => 'yahoochart',
link => $uri,
type => 'yahoochart',
};
foreach my $links (@links){
$mech->get($links);
my $contents = $s->scrape($mech->content, $mech->uri);
foreach my $entry ( @{$contents} ) {
push @{$feed->{entries}}, {
title => $entry->{artist},
body => $entry->{title},
};
}
}
use YAML;
binmode STDOUT, ":utf8";
print Dump $feed;
__END__
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment