Skip to content

Instantly share code, notes, and snippets.

@mohiji
Created January 2, 2014 03:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mohiji/8214733 to your computer and use it in GitHub Desktop.
Save mohiji/8214733 to your computer and use it in GitHub Desktop.
Got distracted while sorting photos and wrote a thing to help me find duplicate files.
//
// main.m
// dupe-detector
//
// Created by Jonathan Fischer on 1/1/14.
// Copyright (c) 2014 Jonathan Fischer. All rights reserved.
//
#import <Foundation/Foundation.h>
#import <CommonCrypto/CommonDigest.h>
#import "FMDB/FMDatabase.h"
static void printUsage()
{
printf("Usage: dupe-detector -db <database> -path <path>\n");
}
static void createTables(FMDatabase *db)
{
[db executeUpdate:@"CREATE TABLE IF NOT EXISTS files (id INTEGER PRIMARY KEY AUTOINCREMENT, path STRING, hash_id INTEGER)"];
[db executeUpdate:@"CREATE TABLE IF NOT EXISTS hashes (id INTEGER PRIMARY KEY AUTOINCREMENT, hash STRING, count INTEGER)"];
}
static NSUInteger insertHash(FMDatabase *db, NSString *hash)
{
FMResultSet *results = [db executeQuery:@"SELECT id FROM hashes WHERE hash = ?", hash];
if ([results next]) {
return [results longForColumn:@"id"];
} else {
// New hash
[db executeUpdate:@"INSERT INTO hashes VALUES (NULL, ?, 0)", hash];
return db.lastInsertRowId;
}
}
static void insertFile(FMDatabase *db, NSString *filePath, NSUInteger hashId)
{
NSNumber *realHashId = [NSNumber numberWithInteger:hashId];
FMResultSet *results = [db executeQuery:@"SELECT id, path FROM files WHERE path = ?", filePath];
if ([results next]) {
NSNumber *fileId = [NSNumber numberWithInteger:[results longForColumn:@"id"]];
[db executeUpdate:@"UPDATE files SET hash_id = ? WHERE id = ?", realHashId, fileId];
} else {
[db executeUpdate:@"INSERT INTO files VALUES (NULL, ?, ?)", filePath, realHashId];
}
}
static void updateHashes(FMDatabase *db)
{
[db executeUpdate:@"UPDATE hashes SET count = (SELECT COUNT(hash_id) FROM files WHERE files.hash_id = hashes.id)"];
}
static NSString *HashStringForFile(NSURL *fileURL)
{
NSData *data = [NSData dataWithContentsOfURL:fileURL];
unsigned char outputData[CC_MD5_DIGEST_LENGTH];
CC_MD5(data.bytes, (CC_LONG)data.length, outputData);
NSMutableString *hashStr = [NSMutableString string];
int i = 0;
for (i = 0;i < CC_MD5_DIGEST_LENGTH; i++) {
[hashStr appendFormat:@"%02x", outputData[i]];
}
return hashStr;
}
int main(int argc, const char * argv[])
{
NSUserDefaults *defaults = [NSUserDefaults standardUserDefaults];
NSString *basePath = [defaults stringForKey:@"path"];
if (basePath == nil) {
printUsage();
return 0;
}
NSString *databasePath = [defaults stringForKey:@"db"];
if (databasePath == nil) {
printUsage();
return 0;
}
FMDatabase *database = [FMDatabase databaseWithPath:databasePath];
if (databasePath == nil) {
NSLog(@"Couldn't open database at path %@", databasePath);
return 0;
}
[database open];
createTables(database);
NSLog(@"Base path is %@", basePath);
NSFileManager *fileManager = [NSFileManager defaultManager];
NSURL *baseUrl = [NSURL URLWithString:basePath];
NSDirectoryEnumerator *enumerator = [fileManager enumeratorAtURL:baseUrl
includingPropertiesForKeys:@[NSURLPathKey, NSURLIsDirectoryKey]
options:NSDirectoryEnumerationSkipsHiddenFiles
errorHandler:NULL];
if (enumerator == nil) {
NSLog(@"Unable to get a directory enumerator for path %@", basePath);
return -1;
}
for (NSURL *fileURL in enumerator) {
NSString *filePath;
[fileURL getResourceValue:&filePath forKey:NSURLPathKey error:nil];
NSString *hashString = HashStringForFile(fileURL);
NSUInteger hashId = insertHash(database, hashString);
insertFile(database, filePath, hashId);
}
updateHashes(database);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment