Created
April 17, 2021 14:02
-
-
Save NbtKmy/c8132132d593a195b0174b4e14035636 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import urllib.request | |
import json | |
import sys | |
from PIL import Image | |
import requests | |
from io import BytesIO | |
import cv2 | |
import numpy as np | |
import img2pdf | |
import argparse | |
# usage - see help: $python iiifDL.py -h | |
# example $python iiifDL.py http://www2.dhii.jp/nijl/NIJL0018/099-0014/manifest.json -s 5 -e 10 | |
# requirement: Python3, Pillow & requests | |
# PIL save() with .jpg - default quality is '75'. | |
# Code written partly by Kiyonori Nagasaki | |
def getImgUrls(x): | |
readObj = urllib.request.urlopen(x) | |
data = json.loads(readObj.read().decode('utf-8')) | |
# get Image Urls as a list | |
iiif3 = 'http://iiif.io/api/presentation/3/context.json' | |
arImageUrls = [] | |
if data['@context'] == iiif3: | |
ver3 = True | |
for seq in data['items']: | |
for canvas in seq['items']: | |
for image in canvas['items']: | |
arImageUrls.append(image['body']['id']) | |
else: | |
ver3 = False | |
for seq in data['sequences']: | |
for canvas in seq['canvases']: | |
for image in canvas['images']: | |
arImageUrls.append(image['resource']['service']['@id']) | |
return arImageUrls, ver3 | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument('url', help='the url of iiif manifest') | |
parser.add_argument('-s', '--startPage', type=int, default=1, help='start page (default = 1)') | |
parser.add_argument('-e', '--endPage', type=int, default=50, help='end page (default = 50)') | |
args = parser.parse_args() | |
url = args.url | |
startPg = args.startPage | |
endPg = args.endPage | |
urlList, ver3 = getImgUrls(url) | |
length = len(urlList) | |
tLen = len(str(length)) + 1 | |
img_list = [] | |
print ('downloading...') | |
for i in range(startPg - 1, endPg): | |
if i == length: | |
endtext = str(length) + ' is the last page' | |
print (endtext) | |
break | |
localFilename = './img/' + str(startPg).zfill(tLen) + '.png' | |
imageUrl = urlList[i] | |
if (ver3): | |
imageUrl = imageUrl | |
else: | |
imageUrl = imageUrl+'/full/full/0/default.jpg' | |
response = requests.get(imageUrl) | |
imgObj = Image.open(BytesIO(response.content)) | |
grau_img = cv2.cvtColor(np.float32(imgObj), cv2.COLOR_BGR2GRAY) | |
cv2.imwrite(localFilename, grau_img) | |
startPg = startPg + 1 | |
print (localFilename) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment