Skip to content

Instantly share code, notes, and snippets.

@gnanet
Forked from giudinvx/slideshare-downloader.sh
Last active December 24, 2015 06:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gnanet/6757236 to your computer and use it in GitHub Desktop.
Save gnanet/6757236 to your computer and use it in GitHub Desktop.
#!/bin/bash
#
# Author: Andrea Lazzarotto
# http://andrealazzarotto.com
# andrea.lazzarotto@gmail.com
# Slideshare Downloader
# This script takes a slideshare presentation URL as an argument and
# carves all the slides in flash format, then they are converted to
# and finally merged as a PDF
# License:
# Copyright 2010-2011 Andrea Lazzarotto
# This script is licensed under the Gnu General Public License v3.0.
# You can obtain a copy of this license here: http://www.gnu.org/licenses/gpl.html
# Usage:
# slideshare-downloader.sh URL [SIZE]
#-----------------------------------------------
# Modify 2013-09-30 gnanet
# Email gna[at]gnanet[dot]net
#-----------------------------------------------
validate_input() {
# Performs a very basic check to see if the url is in the correct form
URL=`echo "$1" | cut -d "#" -f 1 | cut -d "/" -f 1-5`
DOMAIN=`echo "$URL" | cut -d "/" -f 3`
CORRECT='www.slideshare.net'
if [[ "$DOMAIN" != "$CORRECT" ]];
then
echo "Provided URL is not valid."
exit 1
fi
if echo -n "$2" | grep "^[0-9]*$">/dev/null
then SIZE=$2
else
SIZE=2000
echo "Size not defined or invalid... defaulting to 2000."
fi
}
check_dependencies() {
# Verifies if all binaries are present
DEP="wget curl awk sed seq dump-gnash convert"
ERROR="0"
for i in $DEP; do
WHICH="`which $i`"
if [[ "x$WHICH" == "x" ]];
then
echo "Error: $i not found."
ERROR="1"
fi
done
if [ "$ERROR" -eq "1" ];
then
echo "You need to install some packages."
echo "Remember: this script requires Imagemagick and Gnash."
exit 1
fi
}
build_params() {
# Gathers required information
DOCSHORT=`echo "$1" | cut -d "/" -f 5`
echo "Download of $DOCSHORT started."
echo "Fetching information..."
INFOPAGE=`wget -q -O - "$1"`
DOCVERSION=`curl -s "$1" | egrep -o '"version_no":".*","start' | awk -F'"' {' print $4 '}`
DOCID=`curl -s "$1" | egrep -o 'doc=.*","embed_size' | sed -e "s/doc=//g" -e "s/\].*//g"`
if [[ "$DOCID" =~ ([a-z0-9-]+)$ ]]
then
DOCID=${BASH_REMATCH[0]}
else
echo $DOCID
exit 1
fi
SLIDES=`echo "$INFOPAGE" | grep "totalSlides" | head -n 1 | sed -s "s/.*totalSlides//g" | cut -d ":" -f 2 | cut -d "," -f 1`
echo "Slides: $SLIDES"
echo "Size: $SIZE"
echo "Docversion: $DOCVERSION"
}
create_env() {
# Finds a suitable name for the destination directory and creates it
DIR=$DOCSHORT
if [ -e "$DIR" ];
then
I="-1"
OLD=$DIR
while [ -e "$DIR" ]
do
I=$(( $I + 1 ))
DIR="$OLD.$I"
done
fi
mkdir "$DIR"
}
fetch_slides() {
for i in $( seq 1 $SLIDES ); do
echo "Downloading slide $i"
wget "http://cdn.slidesharecdn.com/`echo $DOCID`-slide-`echo $i`.swf?ver=$DOCVERSION" -q -O "$DIR/slide-`echo $i`.swf"
done
echo "All slides downloaded."
}
convert_slides() {
for i in $( seq 1 $SLIDES ); do
echo "Converting slide $i"
dump-gnash --screenshot last --screenshot-file $DIR/slide-$i.png -1 -r1 $DIR/slide-$i.swf
done
echo "All slides converted."
}
build_pdf() {
IMAGES=`ls $DIR/*.png | sort -V`
echo "Generating PDF..."
convert $IMAGES -compress jpeg $DIR/$DOCSHORT.pdf
echo "The PDF has been generated."
echo "Find your presentation in: \"`pwd`/$DIR/$DOCSHORT.pdf\""
}
clean() {
rm -rf $DIR/slide-*.swf
rm -rf $DIR/slide-*.png
}
validate_input $1 $2
check_dependencies
build_params $URL
create_env
fetch_slides
convert_slides
build_pdf
clean
@gnanet
Copy link
Author

gnanet commented Sep 29, 2013

"AJAX Ready" = Today it produces no 0 byte swf-s
Extracting info from the javascript content with curl + awk + sed, depending on Gnash

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment