Skip to content

Instantly share code, notes, and snippets.

@awilliams
Last active October 22, 2021 20:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save awilliams/b025cdae00e89e7c57f8c1d3e2fa29a6 to your computer and use it in GitHub Desktop.
Save awilliams/b025cdae00e89e7c57f8c1d3e2fa29a6 to your computer and use it in GitHub Desktop.
Duplicate image detector
package main
// Duplicate image detector.
//
// Usage:
// ./dupimgs -dir .
//
// This will recursively search the given directory for files with
// {.jpg, .jpeg} extensions (currently hardcoded). For each file with
// such an extension, the MD5 hash of the file's contents will be calculated.
// After traversing all sub-directories and matching files, the paths of files
// with identical hashes will be printed to STDOUT.
import (
"crypto/md5"
"flag"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"strings"
)
var args = struct {
dir string
}{
dir: ".",
}
func main() {
flag.StringVar(&args.dir, "dir", args.dir, "Directory to scan")
flag.Parse()
hashes := make(map[string][]string, 1024)
h := md5.New()
ignoredExts := make(map[string]int)
err := filepath.WalkDir(args.dir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if d.IsDir() {
return nil
}
ext := strings.ToLower(filepath.Ext(d.Name()))
switch ext {
case ".jpg", ".jpeg":
// OK
default:
ignoredExts[ext]++
return nil
}
fd, err := os.Open(path)
if err != nil {
return err
}
defer fd.Close()
h.Reset()
if _, err := io.Copy(h, fd); err != nil {
return err
}
hash := fmt.Sprintf("%x", h.Sum(nil))
hashes[hash] = append(hashes[hash], path)
return nil
})
if err != nil {
bail(err)
}
fmt.Printf("Inspected %d images\n\n", len(hashes))
var dups int
for hash, paths := range hashes {
if len(paths) < 2 {
continue
}
dups++
fmt.Printf("%d photos with hash %q:\n", len(paths), hash)
for _, p := range paths {
fmt.Printf(" - %s\n", p)
}
fmt.Println("===")
}
if len(ignoredExts) > 0 {
fmt.Println("The following file extensions were ignored:")
for ext, count := range ignoredExts {
fmt.Printf("- %-15s %d\n", ext, count)
}
}
if dups > 0 {
fmt.Printf("%d sets of duplicates found\n", dups)
} else {
fmt.Println("No duplicates found")
}
}
func bail(err error) {
fmt.Fprintf(os.Stderr, "Error: %s\n", err.Error())
os.Exit(1)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment