Skip to content

Instantly share code, notes, and snippets.

@VinDuv
Created November 20, 2021 19:59
Show Gist options
  • Save VinDuv/e97377c9d9c3f7093ad26a41ce819319 to your computer and use it in GitHub Desktop.
Save VinDuv/e97377c9d9c3f7093ad26a41ce819319 to your computer and use it in GitHub Desktop.
#!/usr/bin/swift
/*
* Apple Photos duplicate remover
*
* This script identifies exact duplicates in the default Apple Photos library, and removes them.
*
* It uses the standard Apple Photos API so there is no risk of corrupting the library.
* When duplicates are found, it creates two albums with the duplicates to be kept and the ones to be removed
* so you can check the results.
*/
import CryptoKit
import Foundation
import Photos
class ProgressInfo {
init(_ message: String, totalCount: Int) {
self.message = message
self.totalCount = totalCount
self.curCount = 0
self.progress = 0
self.update()
}
func incr(_ count: Int = 1) {
self.curCount += count
let progress = self.curCount * 100 / self.totalCount
if progress != self.progress {
self.progress = progress
self.update()
}
}
func done() {
self.progress = 100
self.update()
print("")
}
private func update() {
let formatted = String(format: "\r[%3d%%] %@", self.progress, self.message)
print(formatted, terminator: "")
fflush(stdout)
}
private var message: String
private var totalCount: Int
private var curCount: Int
private var progress: Int
}
let mainLoop = RunLoop.main
struct MediaItem {
let asset: PHAsset
let url: URL
var size: Int {
return try! url.resourceValues(forKeys: [.fileSizeKey]).fileSize!
}
var checksum: Insecure.MD5Digest {
return try! Insecure.MD5.hash(data: Data(contentsOf: url))
}
func sameAs(_ other: Self) -> Bool {
return try! Data(contentsOf: url) == Data(contentsOf: other.url)
}
static func getLocalItems() -> [Self] {
let opts = PHFetchOptions()
let assets = PHAsset.fetchAssets(with:opts)
let assetCount = assets.count
let editOpts = PHContentEditingInputRequestOptions()
editOpts.canHandleAdjustmentData = {(adjustmeta: PHAdjustmentData) in false }
let videoOpts = PHVideoRequestOptions()
videoOpts.version = .original
let progress = ProgressInfo("Analyzing library…", totalCount: assetCount)
var mediaItems: [Self] = []
let dispatchGroup = DispatchGroup()
var isDone = false
dispatchGroup.enter()
dispatchGroup.notify(queue: .main) {
isDone = true
}
for i in 0 ..< assetCount {
let asset = assets[i]
switch (asset.mediaType) {
case .image:
dispatchGroup.enter()
asset.requestContentEditingInput(with: editOpts) { (input, info) in
defer { progress.incr(); dispatchGroup.leave() }
guard let url = input?.fullSizeImageURL, url.isFileURL else { return }
DispatchQueue.main.async {
mediaItems.append(MediaItem(asset: asset, url: url))
}
}
break
case .video:
dispatchGroup.enter()
PHImageManager.default().requestAVAsset(forVideo: asset, options: videoOpts) { (avAsset, mix, info) in
defer { progress.incr(); dispatchGroup.leave() }
guard let avAsset = avAsset, let avURLAsset = avAsset as? AVURLAsset else { return }
let url = avURLAsset.url
guard url.isFileURL else { print("not local"); return }
DispatchQueue.main.async {
mediaItems.append(MediaItem(asset: asset, url: url))
}
}
break
default:
progress.incr()
break
}
}
dispatchGroup.leave()
while !isDone {
mainLoop.run(mode: .default, before: .distantFuture)
}
progress.done()
return mediaItems
}
}
let items = MediaItem.getLocalItems()
let progress = ProgressInfo("Checking sizes…", totalCount: items.count)
var itemsBySize = Dictionary<Int, [MediaItem]>(grouping: items) {
let size = $0.size
progress.incr()
return size
}
progress.done()
var keptAssets = [PHAsset]()
var duplicateAssets = [PHAsset]()
for (_, items) in itemsBySize {
let count = items.count
guard count > 1 else { continue }
print("Comparing \(count) items with the same size…", terminator: "")
fflush(stdout)
let subItemsByChecksum = Dictionary<Insecure.MD5Digest, [MediaItem]>(grouping: items) {
return $0.checksum
}
var duplicateCount = 0
for (_, subItems) in subItemsByChecksum {
guard subItems.count > 1 else { continue }
let kept = subItems[0]
keptAssets.append(kept.asset)
let duplicates = subItems.suffix(from: 1)
for duplicate in duplicates {
if duplicate.url == kept.url {
fatalError("Duplicate URL \(kept.url) encountered!")
}
if !duplicate.sameAs(kept) {
fatalError("\(kept.url) has the same MD5 as \(duplicate.url) but different contents!")
}
duplicateAssets.append(duplicate.asset)
}
duplicateCount += duplicates.count
}
if duplicateCount > 0 {
print(" Found \(duplicateCount) duplicate(s).")
} else {
print("")
}
}
if duplicateAssets.count > 0 {
try! PHPhotoLibrary.shared().performChangesAndWait {
let keepCollection = PHAssetCollectionChangeRequest.creationRequestForAssetCollection(withTitle: "Duplicates to keep")
let deleteCollection = PHAssetCollectionChangeRequest.creationRequestForAssetCollection(withTitle: "Duplicates to delete")
keepCollection.addAssets(keptAssets as NSFastEnumeration)
deleteCollection.addAssets(duplicateAssets as NSFastEnumeration)
}
print("Two albums “Duplicates to keep” and “Duplicates to delete” have been created.")
print("Please check that all photos in “Duplicates to delete” are present in “Duplicates to keep” before continuing.")
var doDelete = false
var query = true
while query {
print("Delete photos in “Duplicates to delete”? [y/N] ", terminator:"")
switch (readLine() ?? "").lowercased() {
case "y":
query = false
doDelete = true
break
case "n":
fallthrough
case "":
query = false
doDelete = false
break
default:
break
}
}
if doDelete {
print("Deleting… ", terminator: "")
fflush(stdout)
try! PHPhotoLibrary.shared().performChangesAndWait {
PHAssetChangeRequest.deleteAssets(duplicateAssets as NSFastEnumeration)
}
print("Done!")
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment