Skip to content

Instantly share code, notes, and snippets.

@Psykar
Created March 19, 2022 08:52
Show Gist options
  • Save Psykar/0f4f74cf0216cf70a5218d984e72719e to your computer and use it in GitHub Desktop.
Save Psykar/0f4f74cf0216cf70a5218d984e72719e to your computer and use it in GitHub Desktop.
// A shitty little script to find duplicate files and merge things back together again
// Used after a reinstall of google drive sync caused a bunch of duplicate files to be created on google drive
// and needed to clean things up again to reclaim some space, as there was no way to do this automatically.
package dupes
import (
"crypto/sha256"
"errors"
"fmt"
"io"
"io/fs"
"os"
"path"
"path/filepath"
"regexp"
"strings"
)
func Dupes(dir string, limit int) error {
count := 0
pattern := `(.*) \([0-9]\)(\.[a-z][a-z]?[a-z]?[a-z]?)?`
m, err := regexp.Compile(pattern)
if err != nil {
return err
}
mover := dupeMover{
os: realMover{},
}
err = filepath.WalkDir(dir, func(pathn string, d fs.DirEntry, err error) error {
dir, basename := path.Split(pathn)
match := m.FindStringSubmatch(basename)
if len(match) == 0 {
return nil
}
count++
// Find the original
origBasename := path.Join(dir, strings.Join(match[1:], ""))
origStat, err := os.Stat(origBasename)
if err != nil {
mover.noorigs = append(mover.noorigs, pathn)
// fmt.Println("error trying to find orig")
// fmt.Println(pathn)
// fmt.Println(origBasename)
return nil
}
stat, err := os.Stat(pathn)
if err != nil {
return err
}
if origStat.IsDir() != stat.IsDir() {
return errors.New("stats don't match")
}
if origStat.IsDir() {
if err := mover.mergeDirectory(pathn, origBasename); err != nil {
return err
}
} else {
if err := mover.deleteIfEqual(pathn, origBasename); err != nil {
return err
}
}
if count > limit {
return errors.New(fmt.Sprint(limit, " files exceeded"))
}
return nil
})
fmt.Println("====")
fmt.Println("mismatches\n", strings.Join(mover.mismatches, "\n"))
fmt.Println("====")
fmt.Println("no origs:\n", strings.Join(mover.noorigs, "\n"))
return err
}
type Mover interface {
RemoveAll(string) error
Rename(string, string) error
Remove(string) error
}
type dupeMover struct {
os Mover
mismatches []string
noorigs []string
}
func (m dupeMover) mergeDirectory(source, dest string) error {
// Move files in the path into orig
files, err := os.ReadDir(source)
if err != nil {
return err
}
for _, f := range files {
if f.IsDir() {
if err := m.mergeDirectory(
path.Join(source, f.Name()),
path.Join(dest, f.Name())); err != nil {
return err
}
} else {
if err := m.deleteIfEqual(
path.Join(source, f.Name()),
path.Join(dest, f.Name()),
); err != nil {
return err
}
}
}
return m.os.RemoveAll(source)
}
func (m *dupeMover) deleteIfEqual(source, dest string) error {
if _, err := os.Stat(source); err != nil {
return err
}
if _, err := os.Stat(dest); err != nil {
// move it instead
return m.os.Rename(source, dest)
}
shash := hashFile(source)
dhash := hashFile(dest)
if shash != dhash {
m.mismatches = append(m.mismatches, dest)
// fmt.Println("Don't match!!!!")
// fmt.Println(shash)
// fmt.Println(dhash)
// fmt.Println(source)
// fmt.Println(dest)
return nil
}
return m.os.Remove(source)
}
func hashFile(filename string) string {
f, err := os.Open(filename)
if err != nil {
panic(err)
}
defer f.Close()
h := sha256.New()
if _, err := io.Copy(h, f); err != nil {
panic(err)
}
return fmt.Sprintf("%x", h.Sum(nil))
}
type fakeMover struct {
}
func (f fakeMover) Remove(s string) error {
fmt.Println("removed:: ", s)
return nil
}
func (fakeMover) RemoveAll(s string) error {
fmt.Println("removed directory:: ", s)
return nil
}
func (fakeMover) Rename(source, dest string) error {
fmt.Println("renamed:: ", source, " :=>: ", dest)
return nil
}
type realMover struct{}
func (f realMover) Remove(s string) error {
fmt.Println("removed:: ", s)
return os.Remove(s)
}
func (realMover) RemoveAll(s string) error {
fmt.Println("removed directory:: ", s)
f, err := os.ReadDir(s)
if err != nil {
return err
}
if len(f) > 0 {
return errors.New("tried to remove non empty dir")
}
return os.RemoveAll(s)
}
func (realMover) Rename(source, dest string) error {
fmt.Println("renamed:: ", source, " :=>: ", dest)
return os.Rename(source, dest)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment