Cairnarvon/nottineye.py

## nottineye.py
#!/usr/bin/python

import glob
import math
import os
import getopt
import sys

from PIL import Image


EXTS = 'jpg', 'jpeg', 'JPG', 'JPEG', 'gif', 'GIF', 'png', 'PNG', 'bmp', 'BMP'
PROG_NO, PROG_YES, PROG_SMART = range(3)


def avhash(im):
    if not isinstance(im, Image.Image):
        im = Image.open(im)
    im = im.resize((8, 8), Image.ANTIALIAS).convert('L')
    avg = reduce(lambda x, y: x + y, im.getdata()) / 64.
    return reduce(lambda x, (y, z): x | (z << y),
                  enumerate(map(lambda i: 0 if i < avg else 1, im.getdata())),
                  0)

def phash(im):
    if not isinstance(im, Image.Image):
        im = Image.open(im)
    im = im.resize((32, 32), Image.ANTIALIAS).convert('L')
    seq = [sum(im.getpixel((x, y)) *                   \
               math.cos(math.pi / 32 * (x + .5) * u) * \
               math.cos(math.pi / 32 * (y + .5) * v)   \
               for x in range(32) for y in range(32))  \
           for v in range(8) for u in range(8)]
    avg = reduce(lambda x, y: x + y, seq[1:]) / (len(seq) - 1)
    return reduce(lambda x, (y, z): x | (z << y),
                  enumerate(map(lambda i: 0 if i < avg else 1, seq)),
                  0)

def hamming(h1, h2):
    h, d = 0, h1 ^ h2
    while d:
        h += 1
        d &= d - 1
    return h


def _compare(f, fs, algo=avhash, exts=EXTS, prog=PROG_SMART, thresh=64):
    h, seq, x, y = algo(f), [], 1, len(fs)
    for f in fs:
        seq.append((hamming(algo(f), h), f))
        _progress(x, y, prog)
        x += 1
    for t in sorted(seq, key=lambda i: i[0]):
        if t[0] <= thresh:
            print "%d\t%s" % t

def _expand_files(l):
    if not l:
        l = ['.']
    images = []
    for e in l:
        if os.path.isdir(e):
            for ext in EXTS:
                images.extend(glob.glob('%s/*.%s' % (e, ext)))
        else:
            images.extend(glob.glob(e))
    return images

def _help():
    print >> sys.stderr, u"""\
\033[1mUSAGE\033[0m
    \033[1m%s\033[0m [\033[4mOPTION\033[0m]... \033[4mIMAGE\033[0m [\033[4mIMAGE\033[0m|\033[4mDIR\033[0m]...

\033[1mDESCRIPTION\033[0m
    Examine images to see how similar they are to a target image.

    The first image specified is the target image. Every subsequent image is
    examined to see if it matches. You may also provide directories instead, in
    which case we'll look for images there (though not recursively through
    subdirectories).
    Output is the list of images sorted by similarity, plus a number between
    0 and 64 indicating similarity (0 being identical).
    If no extra images are supplied, we'll just look in the current directory.

    Mandatory arguments to long options are mandatory for short options too.

    \033[1m-h\033[0m, \033[1m--help\033[0m
        Print this message and exit.

    \033[1m-a\033[0m, \033[1m--algo\033[0m[\033[1mrithm\033[0m]=\033[4mALGO\033[0m
        Specify hashing algorithm. (\033[1maverage\033[0m or \033[1mphash\033[0m; default: \033[1maverage\033[0m)

    \033[1m--average\033[0m, \033[0m--avg\033[1m
        Same as \033[1m--algo=average\033[0m.

    \033[1m-e\033[0m, \033[1mext\033[0m[\033[1mension\033[0m]\033[1ms\033[0m=\033[4mEXTS\033[0m
        Provide a comma-separated list of valid image extensions for directory
        processing. (Ignored for images that are explicitly provided.)
        Default: \033[1m%s\033[0m

    \033[1m--phash\033[0m
        Same as \033[1m--algo=phash\033[0m.

    \033[1m-p\033[0m, \033[1m--progress\033[0m=\033[1my\033[0m[\033[1mes\033[0m]|\033[1mn\033[0m[\033[1mo\033[0m]|\033[1ms\033[0m[\033[1mmart\033[0m]
        Display progress bar. (default: \033[1msmart\033[0m)

    \033[1m-t\033[0m, \033[1m--threshold\033[0m=\033[4mN\033[0m
        Display only images within Hamming distance \033[4mN\033[0m of target, where
        0 \u2264 N \u2264 63. (Lower is closer match; default: 63)

    \033[1m--only-matches\033[0m
        Same as \033[1m--threshold=10\033[0m.
""" % (sys.argv[0], ','.join(EXTS))

def _parseopt(argv):
    global EXTS
    algo, prog, thresh = avhash, PROG_SMART, 64

    try:
        optlist, args = getopt.getopt(sys.argv[1:], 'ha:p:t:e:',
                                      ['help',
                                       'algo=', 'algorithm=',
                                       'average', 'avhash', 'avg',
                                       'phash',
                                       'progress=',
                                       'threshold=', 'only-matches',
                                       'exts=', 'extensions='])
    except getopt.GetoptError as e:
        print e
        sys.exit(2)

    for opt, arg in optlist:
        arg = arg.replace('=', '')
        if opt in ('--help', '-h'):
            _help()
            sys.exit(0)
        elif opt in ('--algo', '--algorithm', '-a'):
            if arg in ('avhash', 'average', 'avg'):
                algo = avhash
            elif arg == 'phash':
                algo = phash
            else:
                print >> sys.stderr, "Invalid algorithm: %s" % arg
        elif opt in ('--average', '--avhash', '--avg'):
            algo = avhash
        elif opt == '--phash':
            algo = phash
        elif opt in ('--progress', '-p'):
            if arg.lower() in ('yes', 'y', ''):
                prog = PROG_YES
            elif arg.lower() in ('no', 'n'):
                prog = PROG_NO
            elif arg.lower() in ('smart', 's', 'auto'):
                prog = PROG_SMART
            else:
                print >> sys.stderr, "Invalid option: progress=%s" % arg
        elif opt in ('--threshold', '-t'):
            try:
                thresh = int(arg)
            except:
                print >> sys.stderr, "Invalid option: threshold=%s" % arg
        elif opt in ('--exts', '--extensions', '-e'):
            EXTS = arg.split(',')
        elif opt == '--only-matches':
            thresh = 10

    if not args: args = [None]
    return algo, prog, thresh, args[0], _expand_files(args[1:])

def _progress(x, y, prog=PROG_YES):
    if prog == PROG_YES or (prog == PROG_SMART and y > 10 and sys.stdout.isatty()):
        perc, w = 100. * x / y, int(40. * x / y)
        print '\rCalculating... [' + '#' * w + ' ' * (40 - w) + ']',
        print '%.2f%%' % perc, '(%d/%d)' % (x, y),
        if x == y: print
        sys.stdout.flush()

if __name__ == '__main__':
    algo, prog, thresh, f, fs = _parseopt(sys.argv[1:])
    if not f:
        _help()
        sys.exit(1)
    else:
        _compare(f, fs, algo=algo, prog=prog, thresh=thresh)
	#!/usr/bin/python

	import glob
	import math
	import os
	import getopt
	import sys

	from PIL import Image


	EXTS = 'jpg', 'jpeg', 'JPG', 'JPEG', 'gif', 'GIF', 'png', 'PNG', 'bmp', 'BMP'
	PROG_NO, PROG_YES, PROG_SMART = range(3)


	def avhash(im):
	if not isinstance(im, Image.Image):
	im = Image.open(im)
	im = im.resize((8, 8), Image.ANTIALIAS).convert('L')
	avg = reduce(lambda x, y: x + y, im.getdata()) / 64.
	return reduce(lambda x, (y, z): x \| (z << y),
	enumerate(map(lambda i: 0 if i < avg else 1, im.getdata())),
	0)

	def phash(im):
	if not isinstance(im, Image.Image):
	im = Image.open(im)
	im = im.resize((32, 32), Image.ANTIALIAS).convert('L')
	seq = [sum(im.getpixel((x, y)) * \
	math.cos(math.pi / 32 * (x + .5) * u) * \
	math.cos(math.pi / 32 * (y + .5) * v) \
	for x in range(32) for y in range(32)) \
	for v in range(8) for u in range(8)]
	avg = reduce(lambda x, y: x + y, seq[1:]) / (len(seq) - 1)
	return reduce(lambda x, (y, z): x \| (z << y),
	enumerate(map(lambda i: 0 if i < avg else 1, seq)),
	0)

	def hamming(h1, h2):
	h, d = 0, h1 ^ h2
	while d:
	h += 1
	d &= d - 1
	return h


	def _compare(f, fs, algo=avhash, exts=EXTS, prog=PROG_SMART, thresh=64):
	h, seq, x, y = algo(f), [], 1, len(fs)
	for f in fs:
	seq.append((hamming(algo(f), h), f))
	_progress(x, y, prog)
	x += 1
	for t in sorted(seq, key=lambda i: i[0]):
	if t[0] <= thresh:
	print "%d\t%s" % t

	def _expand_files(l):
	if not l:
	l = ['.']
	images = []
	for e in l:
	if os.path.isdir(e):
	for ext in EXTS:
	images.extend(glob.glob('%s/*.%s' % (e, ext)))
	else:
	images.extend(glob.glob(e))
	return images

	def _help():
	print >> sys.stderr, u"""\
	\033[1mUSAGE\033[0m
	\033[1m%s\033[0m [\033[4mOPTION\033[0m]... \033[4mIMAGE\033[0m [\033[4mIMAGE\033[0m\|\033[4mDIR\033[0m]...

	\033[1mDESCRIPTION\033[0m
	Examine images to see how similar they are to a target image.

	The first image specified is the target image. Every subsequent image is
	examined to see if it matches. You may also provide directories instead, in
	which case we'll look for images there (though not recursively through
	subdirectories).
	Output is the list of images sorted by similarity, plus a number between
	0 and 64 indicating similarity (0 being identical).
	If no extra images are supplied, we'll just look in the current directory.

	Mandatory arguments to long options are mandatory for short options too.

	\033[1m-h\033[0m, \033[1m--help\033[0m
	Print this message and exit.

	\033[1m-a\033[0m, \033[1m--algo\033[0m[\033[1mrithm\033[0m]=\033[4mALGO\033[0m
	Specify hashing algorithm. (\033[1maverage\033[0m or \033[1mphash\033[0m; default: \033[1maverage\033[0m)

	\033[1m--average\033[0m, \033[0m--avg\033[1m
	Same as \033[1m--algo=average\033[0m.

	\033[1m-e\033[0m, \033[1mext\033[0m[\033[1mension\033[0m]\033[1ms\033[0m=\033[4mEXTS\033[0m
	Provide a comma-separated list of valid image extensions for directory
	processing. (Ignored for images that are explicitly provided.)
	Default: \033[1m%s\033[0m

	\033[1m--phash\033[0m
	Same as \033[1m--algo=phash\033[0m.

	\033[1m-p\033[0m, \033[1m--progress\033[0m=\033[1my\033[0m[\033[1mes\033[0m]\|\033[1mn\033[0m[\033[1mo\033[0m]\|\033[1ms\033[0m[\033[1mmart\033[0m]
	Display progress bar. (default: \033[1msmart\033[0m)

	\033[1m-t\033[0m, \033[1m--threshold\033[0m=\033[4mN\033[0m
	Display only images within Hamming distance \033[4mN\033[0m of target, where
	0 \u2264 N \u2264 63. (Lower is closer match; default: 63)

	\033[1m--only-matches\033[0m
	Same as \033[1m--threshold=10\033[0m.
	""" % (sys.argv[0], ','.join(EXTS))

	def _parseopt(argv):
	global EXTS
	algo, prog, thresh = avhash, PROG_SMART, 64

	try:
	optlist, args = getopt.getopt(sys.argv[1:], 'ha:p:t:e:',
	['help',
	'algo=', 'algorithm=',
	'average', 'avhash', 'avg',
	'phash',
	'progress=',
	'threshold=', 'only-matches',
	'exts=', 'extensions='])
	except getopt.GetoptError as e:
	print e
	sys.exit(2)

	for opt, arg in optlist:
	arg = arg.replace('=', '')
	if opt in ('--help', '-h'):
	_help()
	sys.exit(0)
	elif opt in ('--algo', '--algorithm', '-a'):
	if arg in ('avhash', 'average', 'avg'):
	algo = avhash
	elif arg == 'phash':
	algo = phash
	else:
	print >> sys.stderr, "Invalid algorithm: %s" % arg
	elif opt in ('--average', '--avhash', '--avg'):
	algo = avhash
	elif opt == '--phash':
	algo = phash
	elif opt in ('--progress', '-p'):
	if arg.lower() in ('yes', 'y', ''):
	prog = PROG_YES
	elif arg.lower() in ('no', 'n'):
	prog = PROG_NO
	elif arg.lower() in ('smart', 's', 'auto'):
	prog = PROG_SMART
	else:
	print >> sys.stderr, "Invalid option: progress=%s" % arg
	elif opt in ('--threshold', '-t'):
	try:
	thresh = int(arg)
	except:
	print >> sys.stderr, "Invalid option: threshold=%s" % arg
	elif opt in ('--exts', '--extensions', '-e'):
	EXTS = arg.split(',')
	elif opt == '--only-matches':
	thresh = 10

	if not args: args = [None]
	return algo, prog, thresh, args[0], _expand_files(args[1:])

	def _progress(x, y, prog=PROG_YES):
	if prog == PROG_YES or (prog == PROG_SMART and y > 10 and sys.stdout.isatty()):
	perc, w = 100. * x / y, int(40. * x / y)
	print '\rCalculating... [' + '#' * w + ' ' * (40 - w) + ']',
	print '%.2f%%' % perc, '(%d/%d)' % (x, y),
	if x == y: print
	sys.stdout.flush()

	if __name__ == '__main__':
	algo, prog, thresh, f, fs = _parseopt(sys.argv[1:])
	if not f:
	_help()
	sys.exit(1)
	else:
	_compare(f, fs, algo=algo, prog=prog, thresh=thresh)