Skip to content

Instantly share code, notes, and snippets.

@isagalaev
Forked from bobuk/img.py
Last active December 11, 2015 05:48
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save isagalaev/4554182 to your computer and use it in GitHub Desktop.
Save isagalaev/4554182 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# coding: utf-8
import os
import random
import glob
from PIL import Image
import numpy
BLOCK_SIZE = 20
THRESHOLD = 60
WIDTH = 200
MAX_DISTANCE = 220
def image_data(filename):
"""
Get data from image ready for comparison
"""
img = Image.open(filename).resize((BLOCK_SIZE, BLOCK_SIZE), Image.BILINEAR)
return numpy.array([sum(x) for x in img.getdata()])
def distance(data1, data2):
"""
Logical distance between two images on a scale 0..400
"""
return sum(1 for x in data1 - data2 if abs(x) > THRESHOLD)
def duplicates(dirname):
"""
Finds duplicate images in a directory.
All files must be *.jpg.
Returns an iterator of image groups ([], [], ... [])
"""
files = glob.glob(os.path.join(dirname, '*.jpg'))
images = [(f, image_data(f)) for f in files]
random.shuffle(images)
for filename, data in images:
distances = [(distance(data, d), f) for f, d in images]
yield sorted([(dist, f) for dist, f in distances if dist < MAX_DISTANCE])
def html_group(group):
return ''.join(
'<img src="%s" width="%s"/>%s' % (os.path.basename(f), WIDTH, dist)
for dist, f in group
)
def html(groups):
"""
Generates HTML from groups of image duplicates
"""
body = '<hr/>'.join(html_group(g) for g in groups)
return '<html><body>%s<hr/></body></html>' % body
if __name__ == '__main__':
print(html(duplicates('/home/maniac/Desktop/4554182')))
@kataev
Copy link

kataev commented Oct 23, 2013

def html_group(group):
    return ''.join(
            '<img src="%s" width="%s"/>%s' % (os.path.basename(f), WIDTH, dist)
            for dist, f in group
        )

Я думаю можно заменить на

def html_group(group):
    tmpl = '<img src="%s" width="%s"/>%s' 
    return ''.join(tmpl % (os.path.basename(f), WIDTH, dist) for dist, f in group)

Что улучшит читаемость и сократит код на 2 строки.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment