Skip to content

Instantly share code, notes, and snippets.

@dkorunic
Last active January 24, 2019 17:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dkorunic/bb80ef8990ae6d4aed191b6b3963b943 to your computer and use it in GitHub Desktop.
Save dkorunic/bb80ef8990ae6d4aed191b6b3963b943 to your computer and use it in GitHub Desktop.
File statistics tool in Golang
// @license
// Copyright (C) 2019 Dinko Korunic
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
// Quick and dirty reimplementation for "Sistemci Hrvatska" Facebook group of the following:
// find . -type f -print0 | xargs -0 ls -l | awk '{size[int(log($5)/log(2))]++}END{for (i in size) printf("%10d %3d\n", 2^i, size[i])}' | sort -n
//
// Performance comparison results against test filesystem:
// $ hyperfine --warmup 3 ./test1.sh ./test2.sh
// Benchmark #1: ./test1.sh
// Time (mean ± σ): 1.380 s ± 0.006 s [User: 1.195 s, System: 0.717 s]
// Range (min … max): 1.369 s … 1.387 s
//
// Benchmark #2: ./test2.sh
// Time (mean ± σ): 367.6 ms ± 9.8 ms [User: 468.4 ms, System: 504.5 ms]
// Range (min … max): 353.1 ms … 382.2 ms
//
// Summary
// './test2.sh' ran
// 3.75 ± 0.10 times faster than './test1.sh'
package main
import (
"fmt"
"math"
"math/big"
"os"
"runtime"
"sort"
"sync"
"github.com/karrick/godirwalk"
)
func main() {
if len(os.Args) < 2 {
fmt.Fprintf(os.Stderr, "Needs a directory path for processing.\n")
os.Exit(1)
}
chi := make(chan string, 512)
cho := make(chan int, 512)
var wg sync.WaitGroup
s := make(map[int]*big.Int)
for i := 0; i < runtime.NumCPU(); i++ {
wg.Add(1)
go func() {
defer wg.Done()
for p := range chi {
fi, err := os.Stat(p)
if err != nil {
continue
}
fs := fi.Size()
if fs <= 0 {
continue
}
k := int(math.Log2(float64(fs)))
cho <- k
}
}()
}
go func() {
for k := range cho {
_, ok := s[k]
if !ok {
s[k] = big.NewInt(0)
}
s[k].Add(s[k], big.NewInt(1))
}
}()
err := godirwalk.Walk(os.Args[1], &godirwalk.Options{
Unsorted: true,
FollowSymbolicLinks: false,
Callback: func(osPath string, de *godirwalk.Dirent) error {
if de.IsRegular() {
chi <- osPath
}
return nil
},
ErrorCallback: func(osPath string, err error) godirwalk.ErrorAction {
return godirwalk.SkipNode
},
})
if err != nil {
fmt.Fprintf(os.Stderr, "%s\n", err)
os.Exit(1)
}
close(chi)
wg.Wait()
close(cho)
k := make([]int, 0, len(s))
for tk := range s {
k = append(k, tk)
}
sort.Ints(k)
for _, v := range k {
fmt.Printf("%10v %30v\n", int(math.Pow(2, float64(v))), s[v])
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment