Last active
December 24, 2020 20:08
-
-
Save sarabveer/42cf3558114b0f1223bfa11ec6dea1c1 to your computer and use it in GitHub Desktop.
Punjab Digital Library Image Downloader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Punjab Digital Library Img Fetcher | |
By Sarabveer Singh (me@sarabveer.me) | |
''' | |
import os | |
import urllib.request as urllib | |
print('Punjab Digital Library Img Fetcher') | |
print('----------') | |
# ID of Manuscript | |
# Example: http://www.panjabdigilib.org/webuser/searches/displayPage.jsp?ID=8790&page=1&CategoryID=1&Searched= | |
# The ID here would be: 8790 | |
id = input('Enter PDL Book ID Number: ') | |
# First Page Number | |
# Usually 1 | |
firstPage = input('Enter First Page Number: ') | |
# Last Page Number | |
lastPage = input('Enter Last Page Number: ') | |
# Create Directory | |
dir = 'PDL-' + id | |
if not os.path.exists(dir): | |
os.makedirs(dir) | |
# Start Download | |
errors = '' | |
print('Start downloading:', dir, '...') | |
for page in range(int(firstPage), int(lastPage)+1): | |
try: | |
urllib.urlretrieve("http://www.panjabdigilib.org/images?ID=" + id + "&page=" + str(page) + "&CategoryID=1&pagetype=1&Searched=W3GX", dir + "/" + str(page) + ".jpg") | |
print('Downloaded Page:', page) | |
except: | |
print('Error Downloading Page:', page) | |
errors += str(page) + ' ' | |
print('Finished downloading:', dir) | |
if errors != '': | |
print('Unable to download following pages:', errors) | |
input("Press enter to exit...") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment