Created
January 2, 2014 03:40
-
-
Save mohiji/8214733 to your computer and use it in GitHub Desktop.
Got distracted while sorting photos and wrote a thing to help me find duplicate files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// main.m | |
// dupe-detector | |
// | |
// Created by Jonathan Fischer on 1/1/14. | |
// Copyright (c) 2014 Jonathan Fischer. All rights reserved. | |
// | |
#import <Foundation/Foundation.h> | |
#import <CommonCrypto/CommonDigest.h> | |
#import "FMDB/FMDatabase.h" | |
static void printUsage() | |
{ | |
printf("Usage: dupe-detector -db <database> -path <path>\n"); | |
} | |
static void createTables(FMDatabase *db) | |
{ | |
[db executeUpdate:@"CREATE TABLE IF NOT EXISTS files (id INTEGER PRIMARY KEY AUTOINCREMENT, path STRING, hash_id INTEGER)"]; | |
[db executeUpdate:@"CREATE TABLE IF NOT EXISTS hashes (id INTEGER PRIMARY KEY AUTOINCREMENT, hash STRING, count INTEGER)"]; | |
} | |
static NSUInteger insertHash(FMDatabase *db, NSString *hash) | |
{ | |
FMResultSet *results = [db executeQuery:@"SELECT id FROM hashes WHERE hash = ?", hash]; | |
if ([results next]) { | |
return [results longForColumn:@"id"]; | |
} else { | |
// New hash | |
[db executeUpdate:@"INSERT INTO hashes VALUES (NULL, ?, 0)", hash]; | |
return db.lastInsertRowId; | |
} | |
} | |
static void insertFile(FMDatabase *db, NSString *filePath, NSUInteger hashId) | |
{ | |
NSNumber *realHashId = [NSNumber numberWithInteger:hashId]; | |
FMResultSet *results = [db executeQuery:@"SELECT id, path FROM files WHERE path = ?", filePath]; | |
if ([results next]) { | |
NSNumber *fileId = [NSNumber numberWithInteger:[results longForColumn:@"id"]]; | |
[db executeUpdate:@"UPDATE files SET hash_id = ? WHERE id = ?", realHashId, fileId]; | |
} else { | |
[db executeUpdate:@"INSERT INTO files VALUES (NULL, ?, ?)", filePath, realHashId]; | |
} | |
} | |
static void updateHashes(FMDatabase *db) | |
{ | |
[db executeUpdate:@"UPDATE hashes SET count = (SELECT COUNT(hash_id) FROM files WHERE files.hash_id = hashes.id)"]; | |
} | |
static NSString *HashStringForFile(NSURL *fileURL) | |
{ | |
NSData *data = [NSData dataWithContentsOfURL:fileURL]; | |
unsigned char outputData[CC_MD5_DIGEST_LENGTH]; | |
CC_MD5(data.bytes, (CC_LONG)data.length, outputData); | |
NSMutableString *hashStr = [NSMutableString string]; | |
int i = 0; | |
for (i = 0;i < CC_MD5_DIGEST_LENGTH; i++) { | |
[hashStr appendFormat:@"%02x", outputData[i]]; | |
} | |
return hashStr; | |
} | |
int main(int argc, const char * argv[]) | |
{ | |
NSUserDefaults *defaults = [NSUserDefaults standardUserDefaults]; | |
NSString *basePath = [defaults stringForKey:@"path"]; | |
if (basePath == nil) { | |
printUsage(); | |
return 0; | |
} | |
NSString *databasePath = [defaults stringForKey:@"db"]; | |
if (databasePath == nil) { | |
printUsage(); | |
return 0; | |
} | |
FMDatabase *database = [FMDatabase databaseWithPath:databasePath]; | |
if (databasePath == nil) { | |
NSLog(@"Couldn't open database at path %@", databasePath); | |
return 0; | |
} | |
[database open]; | |
createTables(database); | |
NSLog(@"Base path is %@", basePath); | |
NSFileManager *fileManager = [NSFileManager defaultManager]; | |
NSURL *baseUrl = [NSURL URLWithString:basePath]; | |
NSDirectoryEnumerator *enumerator = [fileManager enumeratorAtURL:baseUrl | |
includingPropertiesForKeys:@[NSURLPathKey, NSURLIsDirectoryKey] | |
options:NSDirectoryEnumerationSkipsHiddenFiles | |
errorHandler:NULL]; | |
if (enumerator == nil) { | |
NSLog(@"Unable to get a directory enumerator for path %@", basePath); | |
return -1; | |
} | |
for (NSURL *fileURL in enumerator) { | |
NSString *filePath; | |
[fileURL getResourceValue:&filePath forKey:NSURLPathKey error:nil]; | |
NSString *hashString = HashStringForFile(fileURL); | |
NSUInteger hashId = insertHash(database, hashString); | |
insertFile(database, filePath, hashId); | |
} | |
updateHashes(database); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment