Skip to content

Instantly share code, notes, and snippets.

@onlurking
Last active October 20, 2019 02:33
Show Gist options
  • Save onlurking/1f66e4d58074d64c8268 to your computer and use it in GitHub Desktop.
Save onlurking/1f66e4d58074d64c8268 to your computer and use it in GitHub Desktop.
An Slideshare Presentation Downloader written with Python 3 and Insomnia, now only requires img2pdf and BeautifulSoup.
import argparse
import urllib.request
import os
import img2pdf
from os import walk
from os.path import join
from bs4 import BeautifulSoup
work_dir = os.path.dirname(__file__)
def download_images(url):
html = urllib.request.urlopen(url).read()
soup = BeautifulSoup(html)
title = 'pdf_images' # soup.title.string
images = soup.findAll('img', {'class': 'slide_image'})
for image in images:
image_url = image.get('data-full').split('?')[0]
command = 'wget %s -P %s --quiet' % (image_url, title)
os.system(command)
convert_pdf(title)
def convert_pdf(url):
f = []
for (dirpath, dirnames, filenames) in walk(join(work_dir, url)):
f.extend(filenames)
break
f = ["%s/%s" % (url, x) for x in f]
print("Making pdf")
pdf_bytes = img2pdf.convert(f, dpi=300, x=None, y=None)
doc = open('presentation.pdf', 'wb')
doc.write(pdf_bytes)
doc.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("url", type=str,
help="download an slideshare presentation given the url")
args = parser.parse_args()
download_images(args.url)
os.system('rm -r pdf_images')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment