-
-
Save Resisty/dfa3260c51f29eafb40f66ec6b7bb269 to your computer and use it in GitHub Desktop.
Inspect a list of images to see if any of them have really giant dimensions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
This script takes as input a list of image IDs from image table and | |
uses the Pillow python library to determine the height and width | |
of each image. Indicate if the image is larger than a configured pixel limit. | |
""" | |
from argparse import ArgumentParser | |
import os | |
import sys | |
import struct | |
import imghdr | |
__version__ = '0.1' | |
BINSTORE_PATH = "/mnt/binstore/" # Constants should be block caps and have a value | |
LIBRARY = "pillow" | |
OVERSIZED = 64000000 | |
def parse_args(): | |
parser = ArgumentParser( | |
usage='%(prog)s [options] [INFILE ...]', | |
description=__doc__, | |
prog=os.path.basename(sys.argv[0]) | |
) | |
parser.add_argument('infile', metavar='INFILE', # note that this is a positional argument and is, therefore, required | |
help='File containing the list of images') | |
parser.add_argument('-b', '--binstore-path', default=BINSTORE_PATH, | |
help='Path to the directory containing the binstore') | |
parser.add_argument('--library', choices=['native', 'pillow'], | |
default=LIBRARY, | |
help='''Use PIL image processing if available, else | |
use native options to read the image headers''') | |
parser.add_argument('--version', action='version', version=__version__) | |
args = parser.parse_args() | |
# see note about positional argument, next three lines unnecessary | |
#if not (args.infile): | |
# parser.error("An input file must be specified") | |
# sys.exit(0) | |
# globals are badwrong | |
#global binstore_path | |
#global library | |
# we'll use the args later, leave the constants alone | |
#binstore_path = args.binstore_path | |
#library = args.library | |
return args | |
# This probably doesn't need its own function | |
#def is_too_big(image): | |
# """Determine if this image is larger than the application can handle""" | |
# height, width = image.size | |
# return is_too_many_pixels(height, width) | |
# custom errors for when something weird happens | |
class BadImageError(Exception): | |
def __init__(self): | |
pass | |
# Let's use an object to handle our images | |
class Themshits(object): | |
def __init__(self, path, binstore=BINSTORE_PATH, lib=LIBRARY): | |
self._binstore = binstore | |
self._lib = lib | |
self._fname = path | |
self._imagepath = None | |
self._image = None | |
self._x, self._y = None, None | |
self._run = False | |
@property | |
def name(self): | |
return self._fname | |
def too_big(self): | |
if not self._run: | |
self.run() | |
return self._x * self._y >= OVERSIZED | |
def run(self): | |
"""Check a single image file""" | |
self.get_image_filename_from_imagename() | |
try: | |
from PIL import Image | |
except ImportError: | |
pass | |
if Image and self._lib == 'pillow': | |
# Don't catch the exception unless you have a plan | |
with Image.open(self._fname) as im: | |
self._y, self._x = im.size | |
self._run = True | |
else: | |
# http://stackoverflow.com/questions/8032642/how-to-obtain-image-size-using-standard-python-class-without-using-external-lib | |
with open(fname, 'rb') as fhandle: | |
head = fhandle.read(24) | |
if len(head) != 24: | |
return | |
if imghdr.what(fname) == 'png': | |
check = struct.unpack('>i', head[4:8])[0] | |
if check != 0xd0a1a0a: | |
raise BadImageError('%s non-recognizable as .png' % | |
self._fname) | |
width, height = struct.unpack('>ii', head[16:24]) | |
elif imghdr.what(fname) == 'gif': | |
width, height = struct.unpack('<HH', head[6:10]) | |
elif imghdr.what(fname) == 'jpeg': | |
try: | |
fhandle.seek(0) # Read 0xff next | |
size = 2 | |
ftype = 0 | |
while not 0xc0 <= ftype <= 0xcf: | |
fhandle.seek(size, 1) | |
byte = fhandle.read(1) | |
while ord(byte) == 0xff: | |
byte = fhandle.read(1) | |
ftype = ord(byte) | |
size = struct.unpack('>H', fhandle.read(2))[0] - 2 | |
# We are at a SOFn block | |
fhandle.seek(1, 1) # Skip 'precision' byte | |
height, width = struct.unpack('>HH', fhandle.read(4)) | |
except Exception: # IGNORE:W0703 | |
raise | |
else: | |
raise BadImageError('%s non-recognizable image type' % | |
self._fname) | |
self._y, self._x = height, width | |
self._run = True | |
def get_image_filename_from_imagename(self): | |
newname = self._fname.replace('-', '45')[::-1] | |
first_three_chars = list(newname[:3]) | |
self._imagepath = self._binstore + 'jsbs/%s.bin' % '/'.join(first_three_chars+[newname]) | |
def main(): | |
""" This function handles the main logic of the script""" | |
args = parse_args() | |
infile = args.infile #if args.infile else sys.exit(0) # see positional args | |
print(args.binstore_path) | |
print(args.library) | |
with open(infile) as fobj: | |
for line in fobj: | |
t = Themshits(line.strip(), | |
binstore=args.binstore_path, | |
lib=args.library) | |
print ('%s is too big!' % t.name | |
if t.too_big() | |
else '%s is ok!' % t.name) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Blargh, gist is weird. Found a few bugs, will fix later.