Skip to content

Instantly share code, notes, and snippets.

@Resisty
Forked from vigevenoj/how_big_them_shits_is.py
Last active July 14, 2016 14:01
Show Gist options
  • Save Resisty/dfa3260c51f29eafb40f66ec6b7bb269 to your computer and use it in GitHub Desktop.
Save Resisty/dfa3260c51f29eafb40f66ec6b7bb269 to your computer and use it in GitHub Desktop.
Inspect a list of images to see if any of them have really giant dimensions
#!/usr/bin/env python3
"""
This script takes as input a list of image IDs from image table and
uses the Pillow python library to determine the height and width
of each image. Indicate if the image is larger than a configured pixel limit.
"""
from argparse import ArgumentParser
import os
import sys
import struct
import imghdr
__version__ = '0.1'
BINSTORE_PATH = "/mnt/binstore/" # Constants should be block caps and have a value
LIBRARY = "pillow"
OVERSIZED = 64000000
def parse_args():
parser = ArgumentParser(
usage='%(prog)s [options] [INFILE ...]',
description=__doc__,
prog=os.path.basename(sys.argv[0])
)
parser.add_argument('infile', metavar='INFILE', # note that this is a positional argument and is, therefore, required
help='File containing the list of images')
parser.add_argument('-b', '--binstore-path', default=BINSTORE_PATH,
help='Path to the directory containing the binstore')
parser.add_argument('--library', choices=['native', 'pillow'],
default=LIBRARY,
help='''Use PIL image processing if available, else
use native options to read the image headers''')
parser.add_argument('--version', action='version', version=__version__)
args = parser.parse_args()
# see note about positional argument, next three lines unnecessary
#if not (args.infile):
# parser.error("An input file must be specified")
# sys.exit(0)
# globals are badwrong
#global binstore_path
#global library
# we'll use the args later, leave the constants alone
#binstore_path = args.binstore_path
#library = args.library
return args
# This probably doesn't need its own function
#def is_too_big(image):
# """Determine if this image is larger than the application can handle"""
# height, width = image.size
# return is_too_many_pixels(height, width)
# custom errors for when something weird happens
class BadImageError(Exception):
def __init__(self):
pass
# Let's use an object to handle our images
class Themshits(object):
def __init__(self, path, binstore=BINSTORE_PATH, lib=LIBRARY):
self._binstore = binstore
self._lib = lib
self._fname = path
self._imagepath = None
self._image = None
self._x, self._y = None, None
self._run = False
@property
def name(self):
return self._fname
def too_big(self):
if not self._run:
self.run()
return self._x * self._y >= OVERSIZED
def run(self):
"""Check a single image file"""
self.get_image_filename_from_imagename()
try:
from PIL import Image
except ImportError:
pass
if Image and self._lib == 'pillow':
# Don't catch the exception unless you have a plan
with Image.open(self._fname) as im:
self._y, self._x = im.size
self._run = True
else:
# http://stackoverflow.com/questions/8032642/how-to-obtain-image-size-using-standard-python-class-without-using-external-lib
with open(fname, 'rb') as fhandle:
head = fhandle.read(24)
if len(head) != 24:
return
if imghdr.what(fname) == 'png':
check = struct.unpack('>i', head[4:8])[0]
if check != 0xd0a1a0a:
raise BadImageError('%s non-recognizable as .png' %
self._fname)
width, height = struct.unpack('>ii', head[16:24])
elif imghdr.what(fname) == 'gif':
width, height = struct.unpack('<HH', head[6:10])
elif imghdr.what(fname) == 'jpeg':
try:
fhandle.seek(0) # Read 0xff next
size = 2
ftype = 0
while not 0xc0 <= ftype <= 0xcf:
fhandle.seek(size, 1)
byte = fhandle.read(1)
while ord(byte) == 0xff:
byte = fhandle.read(1)
ftype = ord(byte)
size = struct.unpack('>H', fhandle.read(2))[0] - 2
# We are at a SOFn block
fhandle.seek(1, 1) # Skip 'precision' byte
height, width = struct.unpack('>HH', fhandle.read(4))
except Exception: # IGNORE:W0703
raise
else:
raise BadImageError('%s non-recognizable image type' %
self._fname)
self._y, self._x = height, width
self._run = True
def get_image_filename_from_imagename(self):
newname = self._fname.replace('-', '45')[::-1]
first_three_chars = list(newname[:3])
self._imagepath = self._binstore + 'jsbs/%s.bin' % '/'.join(first_three_chars+[newname])
def main():
""" This function handles the main logic of the script"""
args = parse_args()
infile = args.infile #if args.infile else sys.exit(0) # see positional args
print(args.binstore_path)
print(args.library)
with open(infile) as fobj:
for line in fobj:
t = Themshits(line.strip(),
binstore=args.binstore_path,
lib=args.library)
print ('%s is too big!' % t.name
if t.too_big()
else '%s is ok!' % t.name)
if __name__ == "__main__":
main()
@Resisty
Copy link
Author

Resisty commented Jul 14, 2016

Blargh, gist is weird. Found a few bugs, will fix later.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment