Created
April 16, 2021 15:04
-
-
Save NbtKmy/296238b857f65f6c1447b6e7263bc5da to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import urllib.request | |
import json | |
import sys | |
from PIL import Image | |
import requests | |
from io import BytesIO | |
import cv2 | |
import numpy as np | |
import img2pdf | |
import argparse | |
# usage - see help: $python iiifDL.py -h | |
# example $python iiifDL.py http://www2.dhii.jp/nijl/NIJL0018/099-0014/manifest.json -s 5 -e 10 | |
# requirement: Python3, Pillow & requests | |
# PIL save() with .jpg - default quality is '75'. | |
# Code written partly by Kiyonori Nagasaki | |
def getImgUrls(x): | |
readObj = urllib.request.urlopen(x) | |
data = json.loads(readObj.read().decode('utf-8')) | |
# get Image Urls as a list | |
iiif3 = 'http://iiif.io/api/presentation/3/context.json' | |
arImageUrls = [] | |
if data['@context'] == iiif3: | |
ver3 = True | |
for seq in data['items']: | |
for canvas in seq['items']: | |
for image in canvas['items']: | |
arImageUrls.append(image['body']['id']) | |
else: | |
ver3 = False | |
for seq in data['sequences']: | |
for canvas in seq['canvases']: | |
for image in canvas['images']: | |
arImageUrls.append(image['resource']['service']['@id']) | |
return arImageUrls, ver3 | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument('url', help='the url of iiif manifest') | |
parser.add_argument('-s', '--startPage', type=int, default=1, help='start page (default = 1)') | |
parser.add_argument('-e', '--endPage', type=int, default=50, help='end page (default = 50)') | |
args = parser.parse_args() | |
url = args.url | |
startPg = args.startPage | |
endPg = args.endPage | |
urlList, ver3 = getImgUrls(url) | |
length = len(urlList) | |
tLen = len(str(length)) + 1 | |
img_list = [] | |
print ('downloading...') | |
for i in range(startPg - 1, endPg): | |
if i == length: | |
endtext = str(length) + ' is the last page' | |
print (endtext) | |
break | |
localFilename = './img/' + str(startPg).zfill(tLen) + '.png' | |
imageUrl = urlList[i] | |
if (ver3): | |
imageUrl = imageUrl | |
else: | |
imageUrl = imageUrl+'/full/full/0/default.jpg' | |
response = requests.get(imageUrl) | |
imgObj = Image.open(BytesIO(response.content)) | |
grau_img = cv2.cvtColor(np.float32(imgObj), cv2.COLOR_BGR2GRAY) | |
cv2.imwrite(localFilename, grau_img) | |
startPg = startPg + 1 | |
print (localFilename) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment