Skip to content

Instantly share code, notes, and snippets.

@arkarkark
Created November 4, 2017 10:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arkarkark/6439c585f104c3f95d38ddbaed103662 to your computer and use it in GitHub Desktop.
Save arkarkark/6439c585f104c3f95d38ddbaed103662 to your computer and use it in GitHub Desktop.
given an array of duplicate image files make montage images and then find the Photos.app ids of them.
#!/usr/bin/python
"""
./processdupes.py
loads `dupes.json` and makes some montage images so you can check the dupes
then find ids of those images for later addition to albums for deletion.
You'll need to edit the comments at the end to change behavior.
"""
import fileinput
import json
import os
import re
import sqlite3
import subprocess
import sys
# pylint: disable=missing-docstring,no-self-use,too-few-public-methods
PROGRESS_COUNT = 0
PROGRESS_OPTIONS = list('/-\\|/-\\|')
def Progress(handler):
global PROGRESS_COUNT # pylint: disable=global-statement
sys.stdout.write("%s\r" % PROGRESS_OPTIONS[PROGRESS_COUNT % len(PROGRESS_OPTIONS)])
PROGRESS_COUNT += 1
if not PROGRESS_COUNT % 100:
sys.stdout.write("\r %s %s\r" % (PROGRESS_COUNT, handler.status()))
sys.stdout.flush()
class MakeContactSheets(object):
"""Make a contact sheet for each dupe."""
def process(self, dupe):
file_name = re.sub(r'[^a-z0-9A-Z\.]+', '', dupe[0])
cmd = (["montage"] + dupe +
"-verbose -label '%f' -pointsize 10 -background '#000000' -fill 'gray' ".split(" ") +
"-define jpeg:size=200x200 -geometry 200x200+2+2 -auto-orient".split(" ") +
[file_name])
print " ".join(cmd)
subprocess.call(cmd)
subprocess.call(["open", file_name])
class FindIds(object):
"""Find Id's in photos db."""
def __init__(self):
self.conn = sqlite3.connect('photos.db')
self.cursor = self.conn.cursor()
self.ids = {}
self.errors = []
def process(self, dupe, add_all_files=False):
filenames = sorted(dupe, lambda x, y: len(x) - len(y))
if not add_all_files:
filenames.pop() # keep the shortest
for filename in filenames:
filename = "/".join(filename.split("/")[2:]) # strip off libaryname.photoslibrary/Masters
self.cursor.execute("SELECT uuid from RKMaster where imagePath=?", (filename, ))
uuid = self.cursor.fetchone()
if uuid is not None:
uuid = uuid[0]
row = self.cursor.fetchone()
while row is not None:
self.errors.append({
"filename": filename,
"uuid": uuid,
"extra_row": row[0]
})
row = self.cursor.fetchone()
if uuid is not None:
self.cursor.execute("SELECT uuid from RKVersion where masterUuid=?", (uuid,))
photo_uuid = self.cursor.fetchone()
if photo_uuid is not None:
photo_uuid = photo_uuid[0]
row = self.cursor.fetchone()
while row is not None:
self.errors.append({
"filename": filename,
"uuid": uuid,
"photo_uuid": photo_uuid,
"extra_row": row[0]
})
row = self.cursor.fetchone()
basename = os.path.basename(filename)
if basename not in self.ids:
self.ids[basename] = []
self.ids[basename].append(photo_uuid)
else:
self.errors.append({
"filename": filename,
})
def error(self, error):
self.errors.append(error)
def status(self):
return "%d ids found, %d errors" % (len(self.ids), len(self.errors))
def done(self):
json.dump({
"ids": self.ids,
"errors": self.errors
}, open("ids.json", "w"), sort_keys=True, indent=4, separators=(',', ': '))
print "\n", self.status()
def Process(dupes, handler):
for dupe in dupes:
handler.process(dupe)
Progress(handler)
handler.done()
def FindSimilarSmallFiles():
"""Read through input files looking for ones with small, sml, thumb or thumbnail or whatnot in the name.
See if a similar file exists without that in it and make a montage and add it to a dupes album.
`find all.photoslibrary/Masters -type f >allfiles.txt`
"""
# file_bits = ['_sml', '.thumb', '.shad', '_shad_sml']
file_bits = ['.tif']
handler = FindIds()
all_files = {} # key: basename, value: full_path
still_looking_for = {} # key: basename, value: full_path
to_montage = []
for line in fileinput.input():
full_path = line.strip()
for bit in file_bits:
if bit in line:
other_file = full_path.replace(bit, '.jpg')
other_base = os.path.basename(other_file)
if os.path.exists(other_file):
to_montage.extend([full_path, other_file])
handler.process([other_file, full_path], add_all_files=True)
else:
if other_base in all_files:
to_montage.extend([full_path, all_files[other_base]])
handler.process([full_path, all_files[other_base]], add_all_files=True)
else:
still_looking_for[other_base] = full_path
full_base = os.path.basename(full_path)
all_files[full_base] = full_path
if full_base in still_looking_for:
to_montage.extend([still_looking_for[full_base], full_path])
handler.process([still_looking_for[full_base], full_path], add_all_files=True)
Progress(handler)
handler.done()
print "\nMontage this:"
while len(to_montage) > 0:
os.system("open -a preview.app " + " ".join(to_montage[0:20]))
to_montage = to_montage[20:]
def LoadDupes():
dupes = json.load(open("dupes.json", "r"))
# handler = MakeContactSheets()
handler = FindIds()
Process(dupes, handler)
def MakeNewJpgFromTiff():
dupes = json.load(open("tiff.json", "r"))
ids = dupes["ids"]
for line in fileinput.input():
file_name = line.strip()
if os.path.basename(file_name) in ids:
cmd = [
"convert",
file_name,
os.path.splitext(os.path.basename(file_name))[0] + ".jpg"
]
print " ".join(cmd)
subprocess.call(cmd)
def Main():
LoadDupes()
# FindSimilarSmallFiles()
# MakeNewJpgFromTiff()
if __name__ == '__main__':
Main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment