Skip to content

Instantly share code, notes, and snippets.

@chankeypathak
Last active August 29, 2015 14:07
Show Gist options
  • Save chankeypathak/94338d2459644f4eec4c to your computer and use it in GitHub Desktop.
Save chankeypathak/94338d2459644f4eec4c to your computer and use it in GitHub Desktop.
scrapping cromaretail
#!/usr/bin/env perl
use 5.010;
use open qw(:locale);
use strict;
use utf8;
use warnings qw(all);
use Mojo::UserAgent;
my @urls = map { Mojo::URL->new($_) } qw(
http://www.cromaretail.com/Mobile-Phones-c-10.aspx
);
# 4 parallel connections
my $max_conn = 4;
# 5 redirects
my $ua = Mojo::UserAgent->new(max_redirects => 5);
$ua->proxy->detect;
my $active = 0;
Mojo::IOLoop->recurring(
0 => sub {
for ($active + 1 .. $max_conn) {
return ($active or Mojo::IOLoop->stop)
unless my $url = shift @urls;
++$active;
$ua->get($url => \&get_callback);
}
}
);
Mojo::IOLoop->start unless Mojo::IOLoop->is_running;
sub get_callback {
my (undef, $tx) = @_;
--$active;
return
if not $tx->res->is_status_class(200)
or $tx->res->headers->content_type !~ m{^text/html\b}ix;
# Request URL
my $url = $tx->req->url;
parse_html($url, $tx);
return;
}
sub parse_html {
my ($url, $tx) = @_;
#Print the mobile names
print "Devices are are....\n";
say $tx->res->dom->find('h2 > a')->text;
#Print the links of mobiles
print "Links are....\n";
say $tx->res->dom->find('h2 > a')->attr('href');
#Print the price of mobiles
print "Prices are....\n";
say $tx->res->dom->find('article > h3')->text;
say '';
return;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment