Skip to content

Instantly share code, notes, and snippets.

@khellang
Last active August 29, 2015 14:28
Show Gist options
  • Save khellang/3a1a49744f6825b6f401 to your computer and use it in GitHub Desktop.
Save khellang/3a1a49744f6825b6f401 to your computer and use it in GitHub Desktop.
open System
open System.IO
open System.Security.Cryptography
type Arguments = { ByteCount: int64; Path: string; }
let (|Long|_|) str =
match Int64.TryParse(str) with
| (true, int) -> Some(int)
| _ -> None
let rec getFiles path = seq {
yield! Directory.EnumerateFiles(path)
for directory in Directory.EnumerateDirectories(path) do
yield! getFiles(directory)
}
let getHash (file: FileInfo) =
use md5 = MD5.Create()
use stream = file.OpenRead()
md5.ComputeHash(stream)
|> Array.map (fun x -> String.Format("{0:x2}", x))
|> String.concat String.Empty
[<EntryPoint>]
let main argv =
let args =
match argv with
| [|Long byteCount; path|] -> { ByteCount = byteCount; Path = path }
| _ -> eprintfn "Invalid Arguments - Usage: DuplicateFileFinder <byteCount> <path>"; exit 1
let directory = new DirectoryInfo(args.Path);
if (not directory.Exists) then
eprintfn "Invalid Argument: Directory '%s' does not exist!" args.Path; exit 2
printfn "Searching for duplicates in '%s'...%s" directory.FullName Environment.NewLine
let fileGroups =
getFiles(args.Path)
|> Seq.map (fun path -> new FileInfo(path))
|> Seq.filter (fun file -> file.Length >= args.ByteCount)
|> Seq.map (fun file -> (getHash file, file.FullName))
|> Seq.groupBy fst
|> Seq.map (fun (key, value) -> key, Seq.map snd value)
|> Map.ofSeq
for group in fileGroups do
if (Seq.length(group.Value) > 1) then
printfn "Duplicates with hash '%s' found:" group.Key
for duplicate in group.Value do
printfn " - %s" duplicate
printfn ""
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment