ppcamp/Find duplicates.md

## Find duplicates.md

      
    Raw
  

              Find duplicates.md
            
          
Handmade (tooks way more time)

# Based on
# https://unix.stackexchange.com/questions/277697/whats-the-quickest-way-to-find-duplicated-files
# find all non empty files and prepend each match with a md5 
# (wich may take time, but guarantees that the content is the same, while the name may be different)
find . ! -empty -type f -exec md5sum {} + | \
  # sort using 48 cores
  sort --parallel=48  | \
  # count only the md5 part (dup id) and group them splitting by space (-dD also works instead of --group)
  uniq -w32 --group


App to do this

# app to do this
fdupes -r -1 folder > assuncao_repeated.txt

Faster than fdupes

jdupes -r -M folder > assuncao_repeated.txt

You could delete the duplicates directly, or, in my case
I've prefer to move them to a folder and then, using a go script
I've searched and moved them.
package main

import (
	"bufio"
	"io"
	"log"
	"os"
	"path/filepath"
	"strings"
)

const (
	FOLDER_BASE  = "assuncao"
	FOLDER_MOVED = "moved"
)

func main() {
	log.Println("starting")

	base, err := filepath.Abs("..")
	if err != nil {
		log.Println("err", err)
	}

	// fmt.Println(base)

	f, err := os.Open("repeated.txt")
	if err != nil {
		log.Fatal(err)
	}
	defer func() {
		if err := f.Close(); err != nil {
			panic(err)
		}
	}()

	reader := bufio.NewReader(f)
	skipNext := true

	for {
		line, err := reader.ReadString('\n')
		if err != nil {
			if err == io.EOF {
				break
			}
			Log(err, "fail to open file")
		}
		if skipNext {
			skipNext = false
			continue
		}
		if line == "\n" {
			skipNext = true
		}

		tr := strings.Trim(line, "\n")
		if len(tr) == 0 {
			continue
		}

		if !strings.Contains(tr, FOLDER_BASE) {
			continue
		}

		p := filepath.Join(base, tr)
		n := filepath.Join(base, FOLDER_MOVED, tr)

		if err = os.MkdirAll(filepath.Dir(n), os.ModeDir); err != nil {
			Log(err, "fail to create file")
			continue
		}

		if err = os.Rename(p, n); err != nil {
			Log(err, "fail to move files")
		}
	}
	log.Println("finished")
}

func Log(err error, reason string) {
	log.Printf("[error] %s: %s", reason, err)
}