Skip to content

Instantly share code, notes, and snippets.

@visar
Created January 5, 2014 14:33
Show Gist options
  • Save visar/8268883 to your computer and use it in GitHub Desktop.
Save visar/8268883 to your computer and use it in GitHub Desktop.
Perl script for downloading multiple pdfs from a web-page
#!/usr/bin/perl
use strict;
use warnings;
use WWW::Mechanize;
use File::Basename;
use LWP::Simple;
use PDF::API2;
my $page = WWW::Mechanize->new();
$page->get("http://www.cs.cmu.edu/~guna/15-123S11/Lectures");
my @pdfs = $page->find_all_links(
tag => "a",
url_regex => qr/\.pdf$/
);
mkdir "pdfs";
my $merged_file = "pdfs/merged.pdf";
my $merged_pdf = PDF::API2->new( -file => $merged_file );
foreach my $link (@pdfs) {
my $url = $link->url_abs();
my $localfile = basename($url);
my $localpath = "pdfs/$localfile";
getstore( $url, $localpath );
my $input_pdf = PDF::API2->open($localpath);
my @numpages = ( 1 .. $input_pdf->pages() );
foreach my $numpage (@numpages) {
$merged_pdf->importpage( $input_pdf, $numpage, 0 );
}
}
$merged_pdf->save();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment