Skip to content

Instantly share code, notes, and snippets.

@takatoshiono
Created January 10, 2009 10:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save takatoshiono/45437 to your computer and use it in GitHub Desktop.
Save takatoshiono/45437 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
# Download PICS from HTML link in specified page.
# http://fetinavi.blog9.fc2.com/
use strict;
use warnings;
use HTML::LinkExtor;
use IO::File;
use LWP::UserAgent;
use URI;
use constant {
OUT_DIR => './',
DOWNLOAD_INTERVAL => 1,
};
my @URLS;
#my $file = shift;
#my $content = file2content($file);
my $src_url = shift or die "Usage: $0 url\n";
my $content = url2content($src_url);
my $extor = HTML::LinkExtor->new(\&callback);
$extor->parse($content);
foreach my $url (@URLS) {
print "$url\n";
my $content = url2content($url);
if ($content) {
my ($file) = $url =~ /.+\/(.+)$/;
my $fh = IO::File->new($file, O_WRONLY|O_CREAT) or die $!;
print $fh $content;
$fh->close();
}
sleep(DOWNLOAD_INTERVAL);
}
sub file2content {
my $file = shift;
my $fh = IO::File->new($file) or die "Cannot open $file: $!";
my $content = do { local $/; <$fh> };
return $content;
}
my $_ua;
sub url2content {
my $url = shift;
my $_ua ||= LWP::UserAgent->new(agent => 'Mozilla/5.0');
my $res = $_ua->get($url);
if ($res->is_success) {
return $res->content;
}
else {
print STDERR $res->status_line, "\n";
return;
}
}
sub callback {
my ($tag, %attr) = @_;
if ($tag eq 'a') {
if ($attr{href} && $attr{href} =~ /\.(jpeg|jpg)$/) {
push @URLS, $attr{href};
#print "$tag : $attr{href}\n";
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment