Skip to content

Instantly share code, notes, and snippets.

@Cairnarvon
Created June 4, 2011 14:25
Show Gist options
  • Save Cairnarvon/1007938 to your computer and use it in GitHub Desktop.
Save Cairnarvon/1007938 to your computer and use it in GitHub Desktop.
Tineye-style image search, locally. Everyone's a winner. (Requires PIL. --help for usage.)
#!/usr/bin/python
import glob
import math
import os
import getopt
import sys
from PIL import Image
EXTS = 'jpg', 'jpeg', 'JPG', 'JPEG', 'gif', 'GIF', 'png', 'PNG', 'bmp', 'BMP'
PROG_NO, PROG_YES, PROG_SMART = range(3)
def avhash(im):
if not isinstance(im, Image.Image):
im = Image.open(im)
im = im.resize((8, 8), Image.ANTIALIAS).convert('L')
avg = reduce(lambda x, y: x + y, im.getdata()) / 64.
return reduce(lambda x, (y, z): x | (z << y),
enumerate(map(lambda i: 0 if i < avg else 1, im.getdata())),
0)
def phash(im):
if not isinstance(im, Image.Image):
im = Image.open(im)
im = im.resize((32, 32), Image.ANTIALIAS).convert('L')
seq = [sum(im.getpixel((x, y)) * \
math.cos(math.pi / 32 * (x + .5) * u) * \
math.cos(math.pi / 32 * (y + .5) * v) \
for x in range(32) for y in range(32)) \
for v in range(8) for u in range(8)]
avg = reduce(lambda x, y: x + y, seq[1:]) / (len(seq) - 1)
return reduce(lambda x, (y, z): x | (z << y),
enumerate(map(lambda i: 0 if i < avg else 1, seq)),
0)
def hamming(h1, h2):
h, d = 0, h1 ^ h2
while d:
h += 1
d &= d - 1
return h
def _compare(f, fs, algo=avhash, exts=EXTS, prog=PROG_SMART, thresh=64):
h, seq, x, y = algo(f), [], 1, len(fs)
for f in fs:
seq.append((hamming(algo(f), h), f))
_progress(x, y, prog)
x += 1
for t in sorted(seq, key=lambda i: i[0]):
if t[0] <= thresh:
print "%d\t%s" % t
def _expand_files(l):
if not l:
l = ['.']
images = []
for e in l:
if os.path.isdir(e):
for ext in EXTS:
images.extend(glob.glob('%s/*.%s' % (e, ext)))
else:
images.extend(glob.glob(e))
return images
def _help():
print >> sys.stderr, u"""\
\033[1mUSAGE\033[0m
\033[1m%s\033[0m [\033[4mOPTION\033[0m]... \033[4mIMAGE\033[0m [\033[4mIMAGE\033[0m|\033[4mDIR\033[0m]...
\033[1mDESCRIPTION\033[0m
Examine images to see how similar they are to a target image.
The first image specified is the target image. Every subsequent image is
examined to see if it matches. You may also provide directories instead, in
which case we'll look for images there (though not recursively through
subdirectories).
Output is the list of images sorted by similarity, plus a number between
0 and 64 indicating similarity (0 being identical).
If no extra images are supplied, we'll just look in the current directory.
Mandatory arguments to long options are mandatory for short options too.
\033[1m-h\033[0m, \033[1m--help\033[0m
Print this message and exit.
\033[1m-a\033[0m, \033[1m--algo\033[0m[\033[1mrithm\033[0m]=\033[4mALGO\033[0m
Specify hashing algorithm. (\033[1maverage\033[0m or \033[1mphash\033[0m; default: \033[1maverage\033[0m)
\033[1m--average\033[0m, \033[0m--avg\033[1m
Same as \033[1m--algo=average\033[0m.
\033[1m-e\033[0m, \033[1mext\033[0m[\033[1mension\033[0m]\033[1ms\033[0m=\033[4mEXTS\033[0m
Provide a comma-separated list of valid image extensions for directory
processing. (Ignored for images that are explicitly provided.)
Default: \033[1m%s\033[0m
\033[1m--phash\033[0m
Same as \033[1m--algo=phash\033[0m.
\033[1m-p\033[0m, \033[1m--progress\033[0m=\033[1my\033[0m[\033[1mes\033[0m]|\033[1mn\033[0m[\033[1mo\033[0m]|\033[1ms\033[0m[\033[1mmart\033[0m]
Display progress bar. (default: \033[1msmart\033[0m)
\033[1m-t\033[0m, \033[1m--threshold\033[0m=\033[4mN\033[0m
Display only images within Hamming distance \033[4mN\033[0m of target, where
0 \u2264 N \u2264 63. (Lower is closer match; default: 63)
\033[1m--only-matches\033[0m
Same as \033[1m--threshold=10\033[0m.
""" % (sys.argv[0], ','.join(EXTS))
def _parseopt(argv):
global EXTS
algo, prog, thresh = avhash, PROG_SMART, 64
try:
optlist, args = getopt.getopt(sys.argv[1:], 'ha:p:t:e:',
['help',
'algo=', 'algorithm=',
'average', 'avhash', 'avg',
'phash',
'progress=',
'threshold=', 'only-matches',
'exts=', 'extensions='])
except getopt.GetoptError as e:
print e
sys.exit(2)
for opt, arg in optlist:
arg = arg.replace('=', '')
if opt in ('--help', '-h'):
_help()
sys.exit(0)
elif opt in ('--algo', '--algorithm', '-a'):
if arg in ('avhash', 'average', 'avg'):
algo = avhash
elif arg == 'phash':
algo = phash
else:
print >> sys.stderr, "Invalid algorithm: %s" % arg
elif opt in ('--average', '--avhash', '--avg'):
algo = avhash
elif opt == '--phash':
algo = phash
elif opt in ('--progress', '-p'):
if arg.lower() in ('yes', 'y', ''):
prog = PROG_YES
elif arg.lower() in ('no', 'n'):
prog = PROG_NO
elif arg.lower() in ('smart', 's', 'auto'):
prog = PROG_SMART
else:
print >> sys.stderr, "Invalid option: progress=%s" % arg
elif opt in ('--threshold', '-t'):
try:
thresh = int(arg)
except:
print >> sys.stderr, "Invalid option: threshold=%s" % arg
elif opt in ('--exts', '--extensions', '-e'):
EXTS = arg.split(',')
elif opt == '--only-matches':
thresh = 10
if not args: args = [None]
return algo, prog, thresh, args[0], _expand_files(args[1:])
def _progress(x, y, prog=PROG_YES):
if prog == PROG_YES or (prog == PROG_SMART and y > 10 and sys.stdout.isatty()):
perc, w = 100. * x / y, int(40. * x / y)
print '\rCalculating... [' + '#' * w + ' ' * (40 - w) + ']',
print '%.2f%%' % perc, '(%d/%d)' % (x, y),
if x == y: print
sys.stdout.flush()
if __name__ == '__main__':
algo, prog, thresh, f, fs = _parseopt(sys.argv[1:])
if not f:
_help()
sys.exit(1)
else:
_compare(f, fs, algo=algo, prog=prog, thresh=thresh)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment