Skip to content

Instantly share code, notes, and snippets.

@ezr
Last active August 22, 2021 22:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ezr/93a0bff83d423ce950d069a1b5ba56a1 to your computer and use it in GitHub Desktop.
Save ezr/93a0bff83d423ce950d069a1b5ba56a1 to your computer and use it in GitHub Desktop.
find duplicate files in the current directory
package main
import (
"crypto/md5"
"encoding/hex"
"fmt"
"io"
"io/ioutil"
"log"
"os"
)
func swapColor(color string) string {
if color == "\033[92m" {
return "\033[93m"
} else {
return "\033[92m"
}
}
func occurrences(val int64, slice []fileInfo) int {
// count the number of times val is in slice
count := 0
for _, file := range slice {
if file.size == val {
count++
}
}
return count
}
func contains(str string, arr []string) bool {
for _, a := range arr {
if a == str {
return true
}
}
return false
}
func hashFileMD5(filePath string) (string, error) {
var returnMD5String string
file, err := os.Open(filePath)
if err != nil {
return returnMD5String, err
}
defer file.Close()
hash := md5.New()
// io.Copy -> func Copy(dst Writer, src Reader) (written int64, err error)
if _, err := io.Copy(hash, file); err != nil {
return returnMD5String, err
}
// Get the 16 bytes hash
hashInBytes := hash.Sum(nil)[:16]
return hex.EncodeToString(hashInBytes), nil
}
func printColor(color, sum, file string) {
endc := "\033[0m"
fmt.Println(color, sum, "-", file, endc)
}
type fileInfo struct {
name string
size int64
md5 string
}
func main() {
listdir, err := ioutil.ReadDir(".")
// would make sense to take a path as a parameter
if err != nil {
log.Fatal(err)
}
var allFiles = make([]fileInfo, 0)
for _, file := range listdir {
if file.IsDir() {
continue
} else {
allFiles = append(allFiles, fileInfo{name: file.Name(), size: file.Size()})
}
}
files := allFiles[:0]
// populate files with items that have a unique size
for _, file := range allFiles {
if occurrences(file.size, allFiles) > 1 {
files = append(files, file)
}
}
for i, file := range files {
sum, err := hashFileMD5(file.name)
if err != nil {
log.Fatal(err)
}
files[i].md5 = sum
}
matches := make([]string, len(files))
i := 0
color := "\033[92m"
for _, file := range files {
// matches is an array of strings. when a match is found,
// add the md5sum to that list. At the beginning of the loop,
// check the list to see if each sum has already been printed.
if contains(file.md5, matches) {
continue
}
matched := false
for _, file2 := range files {
if file.md5 == file2.md5 && file != file2 {
printColor(color, file.md5, file2.name)
matches[i] = file.md5
matched = true
}
}
if matched {
printColor(color, file.md5, file.name)
color = swapColor(color)
}
i++
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment