Skip to content

Instantly share code, notes, and snippets.

@alexniver
Last active July 24, 2016 12:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alexniver/5f343b88b9f110cf298eeb7c89dc1f0b to your computer and use it in GitHub Desktop.
Save alexniver/5f343b88b9f110cf298eeb7c89dc1f0b to your computer and use it in GitHub Desktop.
Edit from https://blog.golang.org/pipelines, fan in fan out. show use time.
package main
import (
"crypto/md5"
"errors"
"fmt"
"io/ioutil"
"os"
"path/filepath"
//"sort"
"sync"
"time"
)
// walkFiles starts a goroutine to walk the directory tree at root and send the
// path of each regular file on the string channel. It sends the result of the
// walk on the error channel. If done is closed, walkFiles abandons its work.
func walkFiles(done <-chan struct{}, root string) (<-chan string, <-chan error) {
paths := make(chan string)
errc := make(chan error, 1)
go func() { // HL
// Close the paths channel after Walk returns.
defer close(paths) // HL
// No select needed for this send, since errc is buffered.
errc <- filepath.Walk(root, func(path string, info os.FileInfo, err error) error { // HL
if err != nil {
return err
}
if !info.Mode().IsRegular() {
return nil
}
select {
case paths <- path: // HL
case <-done: // HL
return errors.New("walk canceled")
}
return nil
})
}()
return paths, errc
}
// A result is the product of reading and summing a file using MD5.
type result struct {
path string
sum [md5.Size]byte
err error
}
// digester reads path names from paths and sends digests of the corresponding
// files on c until either paths or done is closed.
func digester(done <-chan struct{}, paths <-chan string, c chan<- result) {
for path := range paths { // HLpaths
data, err := ioutil.ReadFile(path)
select {
case c <- result{path, md5.Sum(data), err}:
case <-done:
return
}
}
}
// MD5All reads all the files in the file tree rooted at root and returns a map
// from file path to the MD5 sum of the file's contents. If the directory walk
// fails or any read operation fails, MD5All returns an error. In that case,
// MD5All does not wait for inflight read operations to complete.
func MD5All(root string) (map[string][md5.Size]byte, error) {
// MD5All closes the done channel when it returns; it may do so before
// receiving all the values from c and errc.
done := make(chan struct{})
defer close(done)
paths, errc := walkFiles(done, root)
// Start a fixed number of goroutines to read and digest files.
c := make(chan result) // HLc
var wg sync.WaitGroup
const numDigesters = 20
wg.Add(numDigesters)
for i := 0; i < numDigesters; i++ {
go func() {
digester(done, paths, c) // HLc
wg.Done()
}()
}
go func() {
wg.Wait()
close(c) // HLc
}()
// End of pipeline. OMIT
m := make(map[string][md5.Size]byte)
for r := range c {
if r.err != nil {
return nil, r.err
}
m[r.path] = r.sum
}
// Check whether the Walk failed.
if err := <-errc; err != nil { // HLerrc
return nil, err
}
return m, nil
}
func main() {
// Calculate the MD5 sum of all files under the specified directory,
// then print the results sorted by path name.
timeStart := time.Now()
m, err := MD5All("./")
if err != nil {
fmt.Println(err)
return
}
var paths []string
for path := range m {
paths = append(paths, path)
}
fmt.Println("md5Time : ", time.Now().Sub(timeStart))
}
package main
import (
"crypto/md5"
"errors"
"fmt"
"io/ioutil"
"os"
"path/filepath"
//"sort"
"sync"
"time"
)
// A result is the product of reading and summing a file using MD5.
type result struct {
path string
sum [md5.Size]byte
err error
}
// sumFiles starts goroutines to walk the directory tree at root and digest each
// regular file. These goroutines send the results of the digests on the result
// channel and send the result of the walk on the error channel. If done is
// closed, sumFiles abandons its work.
func sumFiles(done <-chan struct{}, root string) (<-chan result, <-chan error) {
// For each regular file, start a goroutine that sums the file and sends
// the result on c. Send the result of the walk on errc.
c := make(chan result)
errc := make(chan error, 1)
go func() { // HL
var wg sync.WaitGroup
err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.Mode().IsRegular() {
return nil
}
wg.Add(1)
go func() { // HL
data, err := ioutil.ReadFile(path)
select {
case c <- result{path, md5.Sum(data), err}: // HL
case <-done: // HL
}
wg.Done()
}()
// Abort the walk if done is closed.
select {
case <-done: // HL
return errors.New("walk canceled")
default:
return nil
}
})
// Walk has returned, so all calls to wg.Add are done. Start a
// goroutine to close c once all the sends are done.
go func() { // HL
wg.Wait()
close(c) // HL
}()
// No select needed here, since errc is buffered.
errc <- err // HL
}()
return c, errc
}
// MD5All reads all the files in the file tree rooted at root and returns a map
// from file path to the MD5 sum of the file's contents. If the directory walk
// fails or any read operation fails, MD5All returns an error. In that case,
// MD5All does not wait for inflight read operations to complete.
func MD5All(root string) (map[string][md5.Size]byte, error) {
// MD5All closes the done channel when it returns; it may do so before
// receiving all the values from c and errc.
done := make(chan struct{}) // HLdone
defer close(done) // HLdone
c, errc := sumFiles(done, root) // HLdone
m := make(map[string][md5.Size]byte)
for r := range c { // HLrange
if r.err != nil {
return nil, r.err
}
m[r.path] = r.sum
}
if err := <-errc; err != nil {
return nil, err
}
return m, nil
}
func main() {
// Calculate the MD5 sum of all files under the specified directory,
// then print the results sorted by path name.
timeStart := time.Now()
m, err := MD5All("./")
if err != nil {
fmt.Println(err)
return
}
var paths []string
for path := range m {
paths = append(paths, path)
}
fmt.Println("md5 use time :", time.Now().Sub(timeStart))
}
package main
import (
"crypto/md5"
"fmt"
"io/ioutil"
"os"
"path/filepath"
//"sort"
"time"
)
// MD5All reads all the files in the file tree rooted at root and returns a map
// from file path to the MD5 sum of the file's contents. If the directory walk
// fails or any read operation fails, MD5All returns an error.
func MD5All(root string) (map[string][md5.Size]byte, error) {
m := make(map[string][md5.Size]byte)
err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error { // HL
if err != nil {
return err
}
if !info.Mode().IsRegular() {
return nil
}
data, err := ioutil.ReadFile(path) // HL
if err != nil {
return err
}
m[path] = md5.Sum(data) // HL
return nil
})
if err != nil {
return nil, err
}
return m, nil
}
func main() {
// Calculate the MD5 sum of all files under the specified directory,
// then print the results sorted by path name.
timeStart := time.Now()
m, err := MD5All("./") // HL
if err != nil {
fmt.Println(err)
return
}
var paths []string
for path := range m {
paths = append(paths, path)
}
fmt.Println("md5 time use : ", time.Now().Sub(timeStart))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment