Created
November 4, 2017 10:57
given an array of duplicate image files make montage images and then find the Photos.app ids of them.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
""" | |
./processdupes.py | |
loads `dupes.json` and makes some montage images so you can check the dupes | |
then find ids of those images for later addition to albums for deletion. | |
You'll need to edit the comments at the end to change behavior. | |
""" | |
import fileinput | |
import json | |
import os | |
import re | |
import sqlite3 | |
import subprocess | |
import sys | |
# pylint: disable=missing-docstring,no-self-use,too-few-public-methods | |
PROGRESS_COUNT = 0 | |
PROGRESS_OPTIONS = list('/-\\|/-\\|') | |
def Progress(handler): | |
global PROGRESS_COUNT # pylint: disable=global-statement | |
sys.stdout.write("%s\r" % PROGRESS_OPTIONS[PROGRESS_COUNT % len(PROGRESS_OPTIONS)]) | |
PROGRESS_COUNT += 1 | |
if not PROGRESS_COUNT % 100: | |
sys.stdout.write("\r %s %s\r" % (PROGRESS_COUNT, handler.status())) | |
sys.stdout.flush() | |
class MakeContactSheets(object): | |
"""Make a contact sheet for each dupe.""" | |
def process(self, dupe): | |
file_name = re.sub(r'[^a-z0-9A-Z\.]+', '', dupe[0]) | |
cmd = (["montage"] + dupe + | |
"-verbose -label '%f' -pointsize 10 -background '#000000' -fill 'gray' ".split(" ") + | |
"-define jpeg:size=200x200 -geometry 200x200+2+2 -auto-orient".split(" ") + | |
[file_name]) | |
print " ".join(cmd) | |
subprocess.call(cmd) | |
subprocess.call(["open", file_name]) | |
class FindIds(object): | |
"""Find Id's in photos db.""" | |
def __init__(self): | |
self.conn = sqlite3.connect('photos.db') | |
self.cursor = self.conn.cursor() | |
self.ids = {} | |
self.errors = [] | |
def process(self, dupe, add_all_files=False): | |
filenames = sorted(dupe, lambda x, y: len(x) - len(y)) | |
if not add_all_files: | |
filenames.pop() # keep the shortest | |
for filename in filenames: | |
filename = "/".join(filename.split("/")[2:]) # strip off libaryname.photoslibrary/Masters | |
self.cursor.execute("SELECT uuid from RKMaster where imagePath=?", (filename, )) | |
uuid = self.cursor.fetchone() | |
if uuid is not None: | |
uuid = uuid[0] | |
row = self.cursor.fetchone() | |
while row is not None: | |
self.errors.append({ | |
"filename": filename, | |
"uuid": uuid, | |
"extra_row": row[0] | |
}) | |
row = self.cursor.fetchone() | |
if uuid is not None: | |
self.cursor.execute("SELECT uuid from RKVersion where masterUuid=?", (uuid,)) | |
photo_uuid = self.cursor.fetchone() | |
if photo_uuid is not None: | |
photo_uuid = photo_uuid[0] | |
row = self.cursor.fetchone() | |
while row is not None: | |
self.errors.append({ | |
"filename": filename, | |
"uuid": uuid, | |
"photo_uuid": photo_uuid, | |
"extra_row": row[0] | |
}) | |
row = self.cursor.fetchone() | |
basename = os.path.basename(filename) | |
if basename not in self.ids: | |
self.ids[basename] = [] | |
self.ids[basename].append(photo_uuid) | |
else: | |
self.errors.append({ | |
"filename": filename, | |
}) | |
def error(self, error): | |
self.errors.append(error) | |
def status(self): | |
return "%d ids found, %d errors" % (len(self.ids), len(self.errors)) | |
def done(self): | |
json.dump({ | |
"ids": self.ids, | |
"errors": self.errors | |
}, open("ids.json", "w"), sort_keys=True, indent=4, separators=(',', ': ')) | |
print "\n", self.status() | |
def Process(dupes, handler): | |
for dupe in dupes: | |
handler.process(dupe) | |
Progress(handler) | |
handler.done() | |
def FindSimilarSmallFiles(): | |
"""Read through input files looking for ones with small, sml, thumb or thumbnail or whatnot in the name. | |
See if a similar file exists without that in it and make a montage and add it to a dupes album. | |
`find all.photoslibrary/Masters -type f >allfiles.txt` | |
""" | |
# file_bits = ['_sml', '.thumb', '.shad', '_shad_sml'] | |
file_bits = ['.tif'] | |
handler = FindIds() | |
all_files = {} # key: basename, value: full_path | |
still_looking_for = {} # key: basename, value: full_path | |
to_montage = [] | |
for line in fileinput.input(): | |
full_path = line.strip() | |
for bit in file_bits: | |
if bit in line: | |
other_file = full_path.replace(bit, '.jpg') | |
other_base = os.path.basename(other_file) | |
if os.path.exists(other_file): | |
to_montage.extend([full_path, other_file]) | |
handler.process([other_file, full_path], add_all_files=True) | |
else: | |
if other_base in all_files: | |
to_montage.extend([full_path, all_files[other_base]]) | |
handler.process([full_path, all_files[other_base]], add_all_files=True) | |
else: | |
still_looking_for[other_base] = full_path | |
full_base = os.path.basename(full_path) | |
all_files[full_base] = full_path | |
if full_base in still_looking_for: | |
to_montage.extend([still_looking_for[full_base], full_path]) | |
handler.process([still_looking_for[full_base], full_path], add_all_files=True) | |
Progress(handler) | |
handler.done() | |
print "\nMontage this:" | |
while len(to_montage) > 0: | |
os.system("open -a preview.app " + " ".join(to_montage[0:20])) | |
to_montage = to_montage[20:] | |
def LoadDupes(): | |
dupes = json.load(open("dupes.json", "r")) | |
# handler = MakeContactSheets() | |
handler = FindIds() | |
Process(dupes, handler) | |
def MakeNewJpgFromTiff(): | |
dupes = json.load(open("tiff.json", "r")) | |
ids = dupes["ids"] | |
for line in fileinput.input(): | |
file_name = line.strip() | |
if os.path.basename(file_name) in ids: | |
cmd = [ | |
"convert", | |
file_name, | |
os.path.splitext(os.path.basename(file_name))[0] + ".jpg" | |
] | |
print " ".join(cmd) | |
subprocess.call(cmd) | |
def Main(): | |
LoadDupes() | |
# FindSimilarSmallFiles() | |
# MakeNewJpgFromTiff() | |
if __name__ == '__main__': | |
Main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment