Skip to content

Instantly share code, notes, and snippets.

@noureddin
Last active April 22, 2018 06:29
Show Gist options
  • Save noureddin/adb2b7f5d8fcb53a651d1ccc2304ae6f to your computer and use it in GitHub Desktop.
Save noureddin/adb2b7f5d8fcb53a651d1ccc2304ae6f to your computer and use it in GitHub Desktop.
Download presentations from slideshare.net
#!/usr/bin/env perl
use strict; use warnings;
# download presentations from slideshare.net
sub help {
use File::Basename 'basename';
printf 'slideshare-dl - download presentations from slideshare.net
USAGE: %s -dl url_of_slides... [-r small|normal|full] [-q|-v]
-r for resolution;
it can be `small`, `normal` (the default), or `full`.
-q for quiet (the default);
it prints only the output filename (one line per url).
-v for verbose;
it prints the output filename line + a line per slide.
Version: 2018.04.22, noureddin@GitHub. License: Creative Commons Zero
', basename($0);
}
# INSTALL:
# 1. download this file somewhere on your computer.
# 2. give it the executable permissions.
# 3. run it from the command line with a url.
# see also https://github.com/danog/simple-slideshare-downloader
# for a GPLv3 Bash version from a different author.
# this script is written in a (hopefully) platform-agnostic way, but it
# assumes you have GNU `wget` (>= 1.6) and ImageMagick `convert` in your
# path; otherwise you need to change the following two variables.
my $wget = 'wget';
my $conv = 'convert';
use File::Path qw(make_path remove_tree);
use File::Spec::Functions qw(catdir catfile curdir rel2abs splitpath tmpdir);
sub basename_url { $_[0] =~ m|.*/(.*)|; return $1; }
# basic setup
my $user_curdir = rel2abs(curdir());
my $tmpdir = catdir(tmpdir(), 'slideshare-dl');
make_path($tmpdir); chdir($tmpdir);
my $cookies_file = 'slideshare-dl.cookies-file';
my $wget_options = "-cq --load-cookies '$cookies_file' --save-cookies '$cookies_file'";
# parsing args
my $size = 'normal';
my $verbose = 0;
for (@ARGV) {
if ($size) {
next unless (/^-/);
help() if /^-?-[h?]/;
$verbose = 0 if /^-q/;
$verbose = 1 if /^-v/;
$size = $1 if /^-r(.+)/ || /^-.r(.+)/;
$size = undef if /-r$/ || /-.r$/;
} else {
$size = $_;
}
$_ = undef;
}
$wget_options .= ' --show-progress'
if ($verbose);
my @given_urls = grep { defined } @ARGV;
get_slides($_) for (@given_urls);
sub get_slides {
my ($page_url) = @_;
my $page = `$wget $wget_options -O- '$page_url'`;
# getting the urls of the images of the slides
my @urls_unquoted;
while ($page =~ /data-$size="([^"]+)"/g) {
push @urls_unquoted, $1;
}
my @urls_quoted = map { "'" . $_ . "'" } @urls_unquoted;
my @imgs_unquoted = map { basename_url($_) } @urls_unquoted;
my @imgs_quoted = map { "'" . $_ . "'" } @imgs_unquoted;
unlink @imgs_quoted; # to overwrite them if exist
my $output_file_base = basename_url($page_url) . '-' . $size . '.pdf';
my $output_file = catfile($user_curdir, $output_file_base);
`$wget $wget_options @urls_quoted`;
`$conv @imgs_quoted '$output_file'`;
printf "Saved to \"%s\"\n", $output_file_base;
unlink @imgs_unquoted;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment