Skip to content

Instantly share code, notes, and snippets.

@pavelkryukov
Last active September 28, 2019 10:52
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pavelkryukov/15f93d19a99428a284a8bcec27e0187b to your computer and use it in GitHub Desktop.
Save pavelkryukov/15f93d19a99428a284a8bcec27e0187b to your computer and use it in GitHub Desktop.
This script finds local versions of images fetched by InstaLoader.
#!/usr/bin/python
# Copyright (C) Pavel Kryukov 2017
from PIL import Image
import imagehash
import os
import shelve
import shutil
import re
import argparse
from tqdm import tqdm
def is_image_filename(f):
return os.path.splitext(f)[1] in ['.jpg', '.JPG', '.jpeg', '.JPEG', '.jpe', '.JPE']
def update_database(path, db_name):
sources = [os.path.join(dp, f) for dp, dn, filenames in os.walk(path) for f in filenames if is_image_filename(f)]
try:
db = shelve.open(db_name, writeback = True)
except IOError as e:
print "Unable to open database: " + str(e)
raise e
if len(db) is 0:
print "Warning: empty database"
for filename in tqdm(sources):
if filename in db: continue
try:
image = Image.open(filename)
except IOError as e:
print "Unable to open file: " + str(e)
continue
try:
h = imagehash.dhash(image)
except IOError as e:
print "Corrupted image file: " + str(e)
continue
db[filename] = h # TODO pkryukov: this should be indexed by md5 of file
db.close
def lookup_database(path, db_name):
suffix = '_better'
instagrams = [os.path.join(dp, f) for dp, dn, filenames in os.walk(path) for f in filenames if is_image_filename(f) and suffix not in f]
try:
db = shelve.open(db_name)
except IOError as e:
print "Unable to open database: " + str(e)
raise e
for filename in tqdm(instagrams):
better_file = re.sub(".jpg", suffix + ".jpg", filename)
if os.path.isfile(better_file):
continue
image = Image.open(filename)
h = imagehash.dhash(image)
best = min(db, key = lambda(x) : db.get(x) - h)
# Consider images as equal if their hash are different no more than by 4 bits
# However, it can lead to funny collisions :-)
if db.get(best) - h < 5 and os.path.isfile(best):
shutil.copy2(best, better_file)
def main():
ap = argparse.ArgumentParser()
ap.add_argument("-s", "--sources", required = True, help = "path to local images")
ap.add_argument("-t", "--targets", required = True, help = "path to fetched images")
ap.add_argument("-d", "--database", required = True, help = "shelve database file")
args = vars(ap.parse_args())
update_database(args["sources"], args["database"])
lookup_database(args["targets"], args["database"])
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment