Skip to content

Instantly share code, notes, and snippets.

@justinkinney
Forked from marklap/lapsync.go
Last active August 29, 2015 14:18
Show Gist options
  • Save justinkinney/9a4e5b60de2fb465a7b5 to your computer and use it in GitHub Desktop.
Save justinkinney/9a4e5b60de2fb465a7b5 to your computer and use it in GitHub Desktop.
package main
import _ "net/http/pprof"
import (
"bufio"
"crypto/md5"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"net/http"
"log"
)
type entry struct {
path string
info os.FileInfo
err error
}
// md5sum does an md5sum on the contents of the file provided
func md5sum(file_path string, info os.FileInfo) string {
var result string
file, err := os.Open(file_path)
if err != nil {
return result
}
defer file.Close()
reader := bufio.NewReader(file)
hash := md5.New()
if _, err := io.Copy(hash, reader); err != nil {
return result
}
result = fmt.Sprintf("%x", hash.Sum(nil))
// fmt.Println("MD5SUM: ", file_path, " : ", result)
return result
}
// copyFile copies a file from a source to a destination
// worth noting that src_path and dst_path are the full path to the source and destination paths
func copyFile(src_path string, dst_path string) error {
df, err := os.Create(dst_path)
if err != nil {
return err
}
defer df.Close()
sf, err := os.Open(src_path)
if err != nil {
return err
}
defer sf.Close()
_, err = io.Copy(df, sf)
if err != nil {
return err
}
return nil
}
// syncEntry does the work of checking src and dst looking for differences and then syncing to destination.
// It's worth noting that the src_path argument is the full path of the source file and dst_path is the
// full path of the root of the destination - we derive the full path of the destination file by joining
// dst_path and the unique portion of the src_path (to maintain src directory structure).
func (e *entry) syncEntry(src_path *string, dst_path *string) {
src_spec := strings.TrimPrefix(e.path, *src_path)
dst_full_path := filepath.Join(*dst_path, src_spec)
// just letting us know what we're doing - i.e. debug info
// fmt.Println("src:", e.path, "---> dst:", dst_full_path)
// create destination directory structure if it doesn't exist
base_dir_path := dst_full_path
if !e.info.IsDir() {
base_dir_path = filepath.Dir(dst_full_path)
}
err := os.MkdirAll(base_dir_path, 0777)
if err != nil {
e.err = err
return
}
// if the destination doesn't exist, go ahead and copy (denoted with "+++" prepended output)
dst_file, err := os.Open(dst_full_path)
if err != nil {
if os.IsNotExist(err) {
// fmt.Println("+++", e.path)
err := copyFile(e.path, dst_full_path)
if err != nil {
e.err = err
}
return
} else {
e.err = err
return
}
}
defer dst_file.Close()
// if the destination does exist but the file sizes are different, go ahead and copy ("!!!" prepended output)
dst_file_info, err := dst_file.Stat()
if err != nil {
e.err = err
return
}
if e.info.Size() != dst_file_info.Size() {
// fmt.Println("!!!", e.path)
dst_file.Close()
copyFile(e.path, dst_full_path)
if err != nil {
e.err = err
}
return
}
// if the md5 checksum doesn't match, go ahead and copy ("~~~" prepended output)
if md5sum(e.path, e.info) != md5sum(dst_full_path, dst_file_info) {
// fmt.Println("~~~", e.path)
dst_file.Close()
copyFile(e.path, dst_full_path)
if err != nil {
e.err = err
}
return
}
// should be binary identical ("===", e.path)
// fmt.Println("===", e.path)
}
// syncPath is the worker goroutine
func syncPath(src_path *string, dst_path *string, queue chan *entry, wg *sync.WaitGroup) {
for e := range queue {
e.syncEntry(src_path, dst_path)
if e.err != nil {
fmt.Println("**ERROR:", e.err)
}
wg.Done()
}
}
func parseArgs() {
flag.Parse()
if ( len(*src_path) < 1 ) {
fmt.Println("Please provide a source directory to sync")
os.Exit(1)
}
if ( len(*dst_path) < 1 ) {
fmt.Println("Please provide a dest directory to sync")
os.Exit(1)
}
fmt.Println("source directory is: ", *src_path)
fmt.Println("dest directory is: ", *dst_path)
}
var src_path = flag.String("source", "", "source directory")
var dst_path = flag.String("dest", "", "destination directory")
func main() {
parseArgs()
go func() {
log.Println(http.ListenAndServe("localhost:6060", nil))
}()
// want twice as many goroutines as we have cores... lots of io wait going so it shouldn't destroy the system
goRoutines := runtime.NumCPU() * 2
// work queue
queue := make(chan *entry)
// the "thread" barrier
var wg sync.WaitGroup
// start up the goroutines
for i := 0; i < goRoutines; i++ {
go syncPath(src_path, dst_path, queue, &wg)
}
// fill up the work queue with work
filepath.Walk(*src_path, func(path string, info os.FileInfo, err error) error {
wg.Add(1)
queue <- &entry{path, info, err}
return err
})
// wait for it to finish
wg.Wait()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment