Skip to content

Instantly share code, notes, and snippets.

@adamtaylor
Created October 14, 2013 07:23
Show Gist options
  • Save adamtaylor/6972032 to your computer and use it in GitHub Desktop.
Save adamtaylor/6972032 to your computer and use it in GitHub Desktop.
gumtree bike scraper
#!perl
use warnings;
use strict;
use DDP;
use Web::Scraper;
use URI;
use Email::Sender;
my @bikes;
scrape('http://www.gumtree.com/search?q=felt&search_location=United+Kingdom&category=bicycles&min_price=&max_price=');
sub scrape {
my $uri = shift;
my $gumtree = scraper {
process "li.hlisting", "ads[]" => scraper {
process "h3", "title" => 'TEXT';
process ".thumbnail", "img" => '@src';
process ".ad-description", "description" => 'TEXT';
process ".description", "url" => '@href';
};
process "nav#pagination ul li.pag-next a", "next_page" => '@href';
};
my $res = $gumtree->scrape( URI->new( $uri ) );
foreach my $ad ( @{ $res->{ads} } ) {
push @bikes, $ad;
print $ad->{title} . "\n";
print $ad->{img} . "\n";
print $ad->{description} . "\n";
print $ad->{url} . "\n";
}
print $res->{next_page};
scrape($res->{next_page}) if $res->{next_page};
return;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment