Skip to content

Instantly share code, notes, and snippets.

@marklap
Last active July 18, 2018 22:37
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save marklap/e19f4aae2b78b42ab162 to your computer and use it in GitHub Desktop.
Save marklap/e19f4aae2b78b42ab162 to your computer and use it in GitHub Desktop.
Rsync-lite-ish script that uses parallel goroutines to speed things up a bit.
////////////////////////////////////////////////////////////////////////////////
// The MIT License (MIT)
//
// Copyright (c) 2015 Mark LaPerriere
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////
package main
import (
"crypto/md5"
"fmt"
"io"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
)
type entry struct {
path string
info os.FileInfo
err error
}
// md5sum does an md5sum on the contents of the file provided
func md5sum(file_path string, info os.FileInfo) string {
var result string
file, err := os.Open(file_path)
if err != nil {
return result
}
defer file.Close()
hash := md5.New()
if _, err := io.Copy(hash, file); err != nil {
return result
}
result = fmt.Sprintf("%x", hash.Sum(nil))
fmt.Println("MD5SUM:", result)
return result
}
// copyFile copies a file from a source to a destination
// worth noting that src_path and dst_path are the full path to the source and destination paths
func copyFile(src_path string, dst_path string) error {
df, err := os.Create(dst_path)
if err != nil {
return err
}
defer df.Close()
sf, err := os.Open(src_path)
if err != nil {
return err
}
defer sf.Close()
_, err = io.Copy(df, sf)
if err != nil {
return err
}
return nil
}
// syncEntry does the work of checking src and dst looking for differences and then syncing to destination.
// It's worth noting that the src_path argument is the full path of the source file and dst_path is the
// full path of the root of the destination - we derive the full path of the destination file by joining
// dst_path and the unique portion of the src_path (to maintain src directory structure).
func (e *entry) syncEntry(src_path string, dst_path string) {
src_spec := strings.TrimPrefix(e.path, src_path)
dst_full_path := filepath.Join(dst_path, src_spec)
// just letting us know what we're doing - i.e. debug info
fmt.Println("src:", e.path, "---> dst:", dst_full_path)
// create destination directory structure if it doesn't exist
base_dir_path := dst_full_path
if !e.info.IsDir() {
base_dir_path = filepath.Dir(dst_full_path)
}
err := os.MkdirAll(base_dir_path, 0777)
if err != nil {
e.err = err
return
}
// if the destination doesn't exist, go ahead and copy (denoted with "+++" prepended output)
dst_file, err := os.Open(dst_full_path)
if err != nil {
if os.IsNotExist(err) {
fmt.Println("+++", e.path)
err := copyFile(e.path, dst_full_path)
if err != nil {
e.err = err
}
return
} else {
e.err = err
return
}
}
defer dst_file.Close()
// if the destination does exist but the file sizes are different, go ahead and copy ("!!!" prepended output)
dst_file_info, err := dst_file.Stat()
if err != nil {
e.err = err
return
}
if e.info.Size() != dst_file_info.Size() {
fmt.Println("!!!", e.path)
dst_file.Close()
copyFile(e.path, dst_full_path)
if err != nil {
e.err = err
}
return
}
// if the md5 checksum doesn't match, go ahead and copy ("~~~" prepended output)
if md5sum(e.path, e.info) != md5sum(dst_full_path, dst_file_info) {
fmt.Println("~~~", e.path)
dst_file.Close()
copyFile(e.path, dst_full_path)
if err != nil {
e.err = err
}
return
}
// should be binary identical ("===", e.path)
fmt.Println("===", e.path)
}
// syncPath is the worker goroutine
func syncPath(src_path string, dst_path string, queue chan *entry, wg *sync.WaitGroup) {
for e := range queue {
e.syncEntry(src_path, dst_path)
if e.err != nil {
fmt.Println("**ERROR:", e.err)
}
wg.Done()
}
}
func main() {
// duh, source and destination
// TODO: turn into cli args
src_path := `C:\workspace\dev\temp\linuxmig`
dst_path := `C:\workspace\dev\temp\linuxmig.copy`
// want twice as many goroutines as we have cores... lots of io wait going so it shouldn't destroy the system
goRoutines := runtime.NumCPU() * 2
// work queue
queue := make(chan *entry)
// the "thread" barrier
var wg sync.WaitGroup
// start up the goroutines
for i := 0; i < goRoutines; i++ {
go syncPath(src_path, dst_path, queue, &wg)
}
// fill up the work queue with work
filepath.Walk(src_path, func(path string, info os.FileInfo, err error) error {
wg.Add(1)
queue <- &entry{path, info, err}
return err
})
// wait for it to finish
wg.Wait()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment