Skip to content

Instantly share code, notes, and snippets.

@vizee
Created September 17, 2018 17:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vizee/3db9157532617cc86c7c3c1a7006fa65 to your computer and use it in GitHub Desktop.
Save vizee/3db9157532617cc86c7c3c1a7006fa65 to your computer and use it in GitHub Desktop.
find duplicated file
package main
import (
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"encoding/hex"
"flag"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
)
var (
sumfns []func([]byte) string
recursive bool
ignores []string
)
type fileInfo struct {
name string
fps string
}
func sha1sum(data []byte) string {
s := sha1.Sum(data)
return hex.EncodeToString(s[:])
}
func md5sum(data []byte) string {
s := md5.Sum(data)
return hex.EncodeToString(s[:])
}
func sha256sum(data []byte) string {
s := sha256.Sum256(data)
return hex.EncodeToString(s[:])
}
func collectFile(prefix string) ([]*fileInfo, error) {
var fis []*fileInfo
err := filepath.Walk(prefix, func(path string, fi os.FileInfo, err error) error {
skip := false
basename := fi.Name()
for _, ignore := range ignores {
if basename == ignore {
skip = true
break
}
}
if fi.IsDir() {
if !skip && (prefix == path || recursive) {
return nil
} else {
return filepath.SkipDir
}
}
data, err := ioutil.ReadFile(path)
if err != nil {
return err
}
sums := make([]string, 0, len(sumfns)+1)
sums = append(sums, strconv.Itoa(int(fi.Size())))
for _, fn := range sumfns {
sums = append(sums, fn(data))
}
fis = append(fis, &fileInfo{
name: path,
fps: strings.Join(sums, ":"),
})
return nil
})
if err != nil {
return nil, err
}
return fis, nil
}
func main() {
var (
algmd5 bool
algsha1 bool
algsha256 bool
ignore string
)
flag.BoolVar(&algmd5, "md5", true, "md5")
flag.BoolVar(&algsha1, "sha1", false, "sha1")
flag.BoolVar(&algsha256, "sha256", false, "sha256")
flag.BoolVar(&recursive, "r", false, "recursive")
flag.StringVar(&ignore, "i", "", "ignore")
flag.Parse()
if flag.NArg() == 0 {
fmt.Fprintf(flag.CommandLine.Output(), "dupfile [options] <file>...\n")
flag.PrintDefaults()
os.Exit(1)
}
ignores = strings.Split(ignore, ":")
if algmd5 {
sumfns = append(sumfns, md5sum)
}
if algsha1 {
sumfns = append(sumfns, sha1sum)
}
if algsha256 {
sumfns = append(sumfns, sha256sum)
}
db := map[string][]string{}
for _, path := range flag.Args() {
l, err := collectFile(path)
if err != nil {
continue
}
for _, fi := range l {
db[fi.fps] = append(db[fi.fps], fi.name)
}
}
for fps, names := range db {
if len(names) <= 1 {
continue
}
fmt.Printf("%s", fps)
for _, name := range names {
fmt.Printf(" %s", name)
}
fmt.Println()
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment