- Handmade (tooks way more time)
# Based on
# https://unix.stackexchange.com/questions/277697/whats-the-quickest-way-to-find-duplicated-files
# find all non empty files and prepend each match with a md5
# (wich may take time, but guarantees that the content is the same, while the name may be different)
find . ! -empty -type f -exec md5sum {} + | \
# sort using 48 cores
sort --parallel=48 | \
# count only the md5 part (dup id) and group them splitting by space (-dD also works instead of --group)
uniq -w32 --group
- App to do this
# app to do this
fdupes -r -1 folder > assuncao_repeated.txt
- Faster than fdupes
jdupes -r -M folder > assuncao_repeated.txt
You could delete the duplicates directly, or, in my case I've prefer to move them to a folder and then, using a go script I've searched and moved them.
package main
import (
"bufio"
"io"
"log"
"os"
"path/filepath"
"strings"
)
const (
FOLDER_BASE = "assuncao"
FOLDER_MOVED = "moved"
)
func main() {
log.Println("starting")
base, err := filepath.Abs("..")
if err != nil {
log.Println("err", err)
}
// fmt.Println(base)
f, err := os.Open("repeated.txt")
if err != nil {
log.Fatal(err)
}
defer func() {
if err := f.Close(); err != nil {
panic(err)
}
}()
reader := bufio.NewReader(f)
skipNext := true
for {
line, err := reader.ReadString('\n')
if err != nil {
if err == io.EOF {
break
}
Log(err, "fail to open file")
}
if skipNext {
skipNext = false
continue
}
if line == "\n" {
skipNext = true
}
tr := strings.Trim(line, "\n")
if len(tr) == 0 {
continue
}
if !strings.Contains(tr, FOLDER_BASE) {
continue
}
p := filepath.Join(base, tr)
n := filepath.Join(base, FOLDER_MOVED, tr)
if err = os.MkdirAll(filepath.Dir(n), os.ModeDir); err != nil {
Log(err, "fail to create file")
continue
}
if err = os.Rename(p, n); err != nil {
Log(err, "fail to move files")
}
}
log.Println("finished")
}
func Log(err error, reason string) {
log.Printf("[error] %s: %s", reason, err)
}