Created
December 17, 2016 22:47
-
-
Save tim-peters/905bd710be0d4b6606fa6aa6242aaffb to your computer and use it in GitHub Desktop.
Python Script to find corrupted image files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Depends on | |
# * Python | |
# * Pillow (https://pypi.python.org/pypi/Pillow/3.4.2#downloads) | |
# | |
# Execute with | |
# http://superuser.com/questions/276154/automating-the-scanning-of-graphics-files-for-corruption | |
# | |
# Source | |
# http://superuser.com/questions/276154/automating-the-scanning-of-graphics-files-for-corruption | |
# -------------------- | |
#!/usr/bin/env python2 | |
# -*- coding: utf-8 -*- | |
# vi:ts=4 sw=4 et | |
# Okay, this code is a bit ugly, with a few "anti-patterns" and "code smell". | |
# But it works and I don't want to refactor it *right now*. | |
# TODO: | |
# * Refactor it a little | |
# * Add support for custom filename filter (instead of the hardcoded one) | |
import getopt | |
import fnmatch | |
import re | |
import os | |
import os.path | |
import sys | |
import PIL.Image | |
available_parameters = [ | |
("h", "help", "Print help"), | |
("v", "verbose", "Also print clean files"), | |
] | |
class ProgramOptions(object): | |
"""Holds the program options, after they are parsed by parse_options()""" | |
def __init__(self): | |
self.globs = ['*.jpg', '*.jpe', '*.jpeg', '*.png', '*.gif'] | |
self.glob_re = re.compile('|'.join( | |
fnmatch.translate(g) for g in self.globs | |
), re.IGNORECASE) | |
self.verbose = False | |
self.args = [] | |
def print_help(): | |
global opt | |
scriptname = os.path.basename(sys.argv[0]) | |
print "Usage: {0} [options] files_or_directories".format(scriptname) | |
print "Recursively checks for corrupt JPEG files" | |
print "" | |
print "Options:" | |
long_length = 2 + max(len(long) for x, long, y in available_parameters) | |
for short, long, desc in available_parameters: | |
if short and long: | |
comma = ", " | |
else: | |
comma = " " | |
if short == "": | |
short = " " | |
else: | |
short = "-" + short[0] | |
if long: | |
long = "--" + long | |
print " {0}{1}{2:{3}} {4}".format(short, comma, long, long_length, desc) | |
print "" | |
print "Currently (it is hardcoded), it only checks for these files:" | |
print " " + " ".join(opt.globs) | |
def parse_options(argv, opt): | |
"""argv should be sys.argv[1:] | |
opt should be an instance of ProgramOptions()""" | |
try: | |
opts, args = getopt.getopt( | |
argv, | |
"".join(short for short, x, y in available_parameters), | |
[long for x, long, y in available_parameters] | |
) | |
except getopt.GetoptError as e: | |
print str(e) | |
print "Use --help for usage instructions." | |
sys.exit(2) | |
for o, v in opts: | |
if o in ("-h", "--help"): | |
print_help() | |
sys.exit(0) | |
elif o in ("-v", "--verbose"): | |
opt.verbose = True | |
else: | |
print "Invalid parameter: {0}".format(o) | |
print "Use --help for usage instructions." | |
sys.exit(2) | |
opt.args = args | |
if len(args) == 0: | |
print "Missing filename" | |
print "Use --help for usage instructions." | |
sys.exit(2) | |
def is_corrupt(jpegfile): | |
"""Returns None if the file is okay, returns an error string if the file is corrupt.""" | |
# http://stackoverflow.com/questions/1401527/how-do-i-programmatically-check-whether-an-image-png-jpeg-or-gif-is-corrupted/1401565#1401565 | |
try: | |
im = PIL.Image.open(jpegfile) | |
im.verify() | |
except Exception as e: | |
return str(e) | |
return None | |
def check_files(files): | |
"""Receives a list of files and check each one.""" | |
global opt | |
for f in files: | |
# Filtering only JPEG images | |
if opt.glob_re.match(f): | |
status = is_corrupt(f) | |
if opt.verbose and status is None: | |
status = "Ok" | |
if status: | |
print "{0}: {1}".format(f, status) | |
def main(): | |
global opt | |
opt = ProgramOptions() | |
parse_options(sys.argv[1:], opt) | |
for pathname in opt.args: | |
if os.path.isfile(pathname): | |
check_files([pathname]) | |
elif os.path.isdir(pathname): | |
for dirpath, dirnames, filenames in os.walk(pathname): | |
check_files(os.path.join(dirpath, f) for f in filenames) | |
else: | |
print "ERROR: '{0}' is neither a file or a dir.".format(pathname) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment