Skip to content

Instantly share code, notes, and snippets.

@sarabveer
Last active December 24, 2020 20:08
Show Gist options
  • Save sarabveer/42cf3558114b0f1223bfa11ec6dea1c1 to your computer and use it in GitHub Desktop.
Save sarabveer/42cf3558114b0f1223bfa11ec6dea1c1 to your computer and use it in GitHub Desktop.
Punjab Digital Library Image Downloader
'''
Punjab Digital Library Img Fetcher
By Sarabveer Singh (me@sarabveer.me)
'''
import os
import urllib.request as urllib
print('Punjab Digital Library Img Fetcher')
print('----------')
# ID of Manuscript
# Example: http://www.panjabdigilib.org/webuser/searches/displayPage.jsp?ID=8790&page=1&CategoryID=1&Searched=
# The ID here would be: 8790
id = input('Enter PDL Book ID Number: ')
# First Page Number
# Usually 1
firstPage = input('Enter First Page Number: ')
# Last Page Number
lastPage = input('Enter Last Page Number: ')
# Create Directory
dir = 'PDL-' + id
if not os.path.exists(dir):
os.makedirs(dir)
# Start Download
errors = ''
print('Start downloading:', dir, '...')
for page in range(int(firstPage), int(lastPage)+1):
try:
urllib.urlretrieve("http://www.panjabdigilib.org/images?ID=" + id + "&page=" + str(page) + "&CategoryID=1&pagetype=1&Searched=W3GX", dir + "/" + str(page) + ".jpg")
print('Downloaded Page:', page)
except:
print('Error Downloading Page:', page)
errors += str(page) + ' '
print('Finished downloading:', dir)
if errors != '':
print('Unable to download following pages:', errors)
input("Press enter to exit...")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment