Skip to content

Instantly share code, notes, and snippets.

@mheiges
Created June 3, 2015 19:38
Show Gist options
  • Save mheiges/c6cd6a2cfe9daaaf330d to your computer and use it in GitHub Desktop.
Save mheiges/c6cd6a2cfe9daaaf330d to your computer and use it in GitHub Desktop.
Download Oracle JDK RPM files
#!/usr/bin/perl
# Scrape Oracle JDK web pages for RPM links. Download those RPM files.
# Deletes downloaded files that do not pass rpm checksum (e.g. incomplete downloads).
# Skips downloading files already on the local filesystem.
use strict;
use sigtrap qw(handler exit_handler normal-signals error-signals);
my $ARCH = 'x64';
my $DOWNLOAD_DIR = '/home/mheiges/tmp';
my @rpms_downloaded;
# URLs for each major JDK version that lists the jdk files for download.
# The specific urls for direct downloads of RPM files will be scraped from these.
my @jdk_version_intro_pages = qw(
http://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html
http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html
);
# Scrape pages for RPM download urls and then download the files.
for my $page_url (@jdk_version_intro_pages) {
my @dl_urls = extract_dl_urls($page_url);
for my $dl_url (@dl_urls) {
download_rpm_file($dl_url);
}
}
exit 0;
# get list of direct download urls for RPMs
sub extract_dl_urls {
my ($page_url) = @_;
my @dl_urls;
open CMD,'-|', qq(
curl -s $page_url | grep "downloads\\['jdk" | grep ${ARCH}.rpm
) or die $@;
my $line;
while (defined($line=<CMD>)) {
my ($dl_url) = $line =~ m/filepath[:\"]+([^\"]+)/i;
push @dl_urls, $dl_url;
}
close CMD;
if (scalar @dl_urls == 0) {
warn "WARNING: No downloads URLs found on\n${page_url} .\n";
warn "Verify the page is valid and this script's grep for urls is up to date.\n";
}
return @dl_urls;
}
sub download_rpm_file {
my ($url) = @_;
my ($filename) = $url =~ m/^.+\/(.+)$/;
if ( $filename !~ m/${ARCH}\.rpm$/ ) {
print "skipping unexpected name: $filename\n";
return;
}
my $filepath = "${DOWNLOAD_DIR}/${filename}";
return if (-f $filepath);
push @rpms_downloaded, $filepath;
print "downloading $filename ...\n";
open CMD,'-|', qq(
cd "$DOWNLOAD_DIR";
curl -s -j -k -L -O -H "Cookie: oraclelicense=accept-securebackup-cookie" '$url' -o "$filepath"
) or die $@;
close CMD;
validate_rpm_file($filepath);
}
sub exit_handler {
print "exiting";
for my $filepath (@rpms_downloaded) {
validate_rpm_file($filepath);
}
}
sub validate_rpm_file {
my ($filepath) = @_;
return unless $filepath =~ m/${ARCH}\.rpm$/;
print "validating $filepath ...\n";
open CMD,'|-', qq(
rpm --quiet -Kp $filepath || { echo 'not valid'; rm "$filepath"; }
) or die $@;
close CMD;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment