Skip to content

Instantly share code, notes, and snippets.

@NbtKmy
Created April 17, 2021 14:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save NbtKmy/c8132132d593a195b0174b4e14035636 to your computer and use it in GitHub Desktop.
Save NbtKmy/c8132132d593a195b0174b4e14035636 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import urllib.request
import json
import sys
from PIL import Image
import requests
from io import BytesIO
import cv2
import numpy as np
import img2pdf
import argparse
# usage - see help: $python iiifDL.py -h
# example $python iiifDL.py http://www2.dhii.jp/nijl/NIJL0018/099-0014/manifest.json -s 5 -e 10
# requirement: Python3, Pillow & requests
# PIL save() with .jpg - default quality is '75'.
# Code written partly by Kiyonori Nagasaki
def getImgUrls(x):
readObj = urllib.request.urlopen(x)
data = json.loads(readObj.read().decode('utf-8'))
# get Image Urls as a list
iiif3 = 'http://iiif.io/api/presentation/3/context.json'
arImageUrls = []
if data['@context'] == iiif3:
ver3 = True
for seq in data['items']:
for canvas in seq['items']:
for image in canvas['items']:
arImageUrls.append(image['body']['id'])
else:
ver3 = False
for seq in data['sequences']:
for canvas in seq['canvases']:
for image in canvas['images']:
arImageUrls.append(image['resource']['service']['@id'])
return arImageUrls, ver3
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('url', help='the url of iiif manifest')
parser.add_argument('-s', '--startPage', type=int, default=1, help='start page (default = 1)')
parser.add_argument('-e', '--endPage', type=int, default=50, help='end page (default = 50)')
args = parser.parse_args()
url = args.url
startPg = args.startPage
endPg = args.endPage
urlList, ver3 = getImgUrls(url)
length = len(urlList)
tLen = len(str(length)) + 1
img_list = []
print ('downloading...')
for i in range(startPg - 1, endPg):
if i == length:
endtext = str(length) + ' is the last page'
print (endtext)
break
localFilename = './img/' + str(startPg).zfill(tLen) + '.png'
imageUrl = urlList[i]
if (ver3):
imageUrl = imageUrl
else:
imageUrl = imageUrl+'/full/full/0/default.jpg'
response = requests.get(imageUrl)
imgObj = Image.open(BytesIO(response.content))
grau_img = cv2.cvtColor(np.float32(imgObj), cv2.COLOR_BGR2GRAY)
cv2.imwrite(localFilename, grau_img)
startPg = startPg + 1
print (localFilename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment