Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@Feh
Created October 26, 2012 21:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Feh/3961597 to your computer and use it in GitHub Desktop.
Save Feh/3961597 to your computer and use it in GitHub Desktop.
When the disk cache is full, grep(1) is single-threaded and CPU-bound. I tried implementing a simple "fgrep -IR" in Go using channels and one goroutine per file, but the program is an order of magnitude slower. Why?
package main
import (
"fmt"
"os"
"io"
"bufio"
"strings"
"flag"
"bytes"
"runtime/pprof"
)
func lines(fn string) chan string {
c := make(chan string)
go func() {
defer close(c)
f, err := os.Open(fn)
if err != nil {
fmt.Fprintln(os.Stderr, "Cannot open", fn + ":", err)
return
}
defer f.Close()
data := make([]byte, 256)
count, err := f.Read(data)
isBinary := bytes.Contains(data[:count], []byte{0})
f.Seek(0, 0) // rewind
if isBinary {
return
}
bf := bufio.NewReader(f)
for {
line, isPrefix, err := bf.ReadLine()
if err == io.EOF {
return
}
if err == nil && !isPrefix {
c <- string(line)
}
}
}()
return c
}
func matches(substr, fn string) chan string {
c := make(chan string)
go func() {
defer close(c)
for l := range lines(fn) {
if strings.Contains(string(l), substr) {
c <- fn + ": " + l
}
}
}()
return c
}
func lsr(dir string) (c []string) {
c = make([]string, 0)
h, err := os.Open(dir)
if err != nil {
fmt.Fprintln(os.Stderr, "warning:", err)
return
}
defer h.Close()
entries, err := h.Readdirnames(0)
if err != nil {
fmt.Fprintln(os.Stderr, "warning:", err)
return
}
for _, entry := range entries {
e := dir + "/" + entry
info, err := os.Stat(e)
if err != nil {
fmt.Fprintln(os.Stderr, "warning: cannot stat("+e+"):", err)
continue
}
if info.IsDir() {
c = append(c, lsr(e)...)
} else {
c = append(c, e)
}
}
return
}
func fgrepr(str, dir string) {
workers := make(chan chan string)
go func() {
for _, fn := range lsr(dir) {
workers <- matches(str, fn)
}
close(workers)
}()
for w := range workers {
for l := range w {
fmt.Println(l)
}
}
}
func main() {
var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
flag.Parse()
if *cpuprofile != "" {
f, err := os.Create(*cpuprofile)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
}
args := flag.Args()
if len(args) != 2 {
fmt.Println("Usage: fgrep <pattern> <directory>")
os.Exit(1)
}
fgrepr(args[0], args[1])
}
@stevedomin
Copy link

Hello,
I think the bottleneck are the goroutines.
With a simpler approach I think I have better performance than grep/fgrep on simple test case (match string).
What do you think ?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment