Skip to content

Instantly share code, notes, and snippets.

@vigevenoj
Created July 14, 2016 05:07
Show Gist options
  • Save vigevenoj/441a428bd1fd864a9857153b95e31286 to your computer and use it in GitHub Desktop.
Save vigevenoj/441a428bd1fd864a9857153b95e31286 to your computer and use it in GitHub Desktop.
Inspect a list of images to see if any of them have really giant dimensions
#!/usr/bin/env python3
"""
This script takes as input a list of image IDs from image table and
uses the Pillow python library to determine the height and width
of each image. Indicate if the image is larger than a configured pixel limit.
"""
from argparse import ArgumentParser
import os
import sys
import struct
import imghdr
__version__ = '0.1'
binstore_path = ""
library = ""
def parse_args():
parser = ArgumentParser(
usage='%(prog)s [options] [INFILE ...]',
description=__doc__,
prog=os.path.basename(sys.argv[0])
)
parser.add_argument('infile', metavar='INFILE',
help='File containing the list of images')
parser.add_argument('-b', '--binstore-path', default="/mnt/binstore/",
help='Path to the directory containing the binstore')
parser.add_argument('--library', choices=['native', 'pillow'],
default='pillow',
help='''Use PIL image processing if available, else
use native options to read the image headers''')
parser.add_argument('--version', action='version', version=__version__)
args = parser.parse_args()
if not (args.infile):
parser.error("An input file must be specified")
sys.exit(0)
global binstore_path
global library
binstore_path = args.binstore_path
library = args.library
return args
def is_too_big(image):
"""Determine if this image is larger than the application can handle"""
height, width = image.size
return is_too_many_pixels(height, width)
def is_too_many_pixels(height, width):
"""Determine if the height * width of this image results in exceeding the
max pixel check"""
return (height * width) >= 64000000
def get_image_size(fname):
# http://stackoverflow.com/questions/8032642/how-to-obtain-image-size-using-standard-python-class-without-using-external-lib
with open(fname, 'rb') as fhandle:
head = fhandle.read(24)
if len(head) != 24:
return
if imghdr.what(fname) == 'png':
check = struct.unpack('>i', head[4:8])[0]
if check != 0xd0a1a0a:
return
width, height = struct.unpack('>ii', head[16:24])
elif imghdr.what(fname) == 'gif':
width, height = struct.unpack('<HH', head[6:10])
elif imghdr.what(fname) == 'jpeg':
try:
fhandle.seek(0) # Read 0xff next
size = 2
ftype = 0
while not 0xc0 <= ftype <= 0xcf:
fhandle.seek(size, 1)
byte = fhandle.read(1)
while ord(byte) == 0xff:
byte = fhandle.read(1)
ftype = ord(byte)
size = struct.unpack('>H', fhandle.read(2))[0] - 2
# We are at a SOFn block
fhandle.seek(1, 1) # Skip 'precision' byte
height, width = struct.unpack('>HH', fhandle.read(4))
except Exception: # IGNORE:W0703
return
else:
return
return width, height
def handle_name(imagename):
"""Check a single image file"""
try:
from PIL import Image
except ImportError:
pass
if Image and library == 'pillow':
try:
with Image.open(imagename) as im:
print(imagename, im.format, "%dx%d" % im.size)
if is_too_big(im):
print(imagename, " is too big at ", im.size)
except IOError:
print("error opening ", imagename)
pass
else:
try:
width, height = get_image_size(imagename)
if is_too_big(width, height):
print(imagename, " is too big at ", width, height, " pixels")
except IOError:
print("error opening ", imagename, " with native library")
pass
def get_image_filename_from_imagename(imagename):
newname = imagename.replace('-', '45')[::-1]
first_three_chars = list(newname[:3])
binstorepath = binstore_path + 'jsbs/%s.bin' % '/'.join(first_three_chars+[newname])
print("binstore path for", imagename, " is ", binstorepath)
return imagename
def main():
""" This function handles the main logic of the script"""
args = parse_args()
infile = args.infile if args.infile else sys.exit(0)
print(args.binstore_path)
print(args.library)
with open(infile) as fobj:
for line in fobj:
line = line.replace('\n', '')
handle_name(get_image_filename_from_imagename(line))
# print(line, " is ", get_image_size(line))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment