Skip to content

Instantly share code, notes, and snippets.

@BurntSushi
Created May 16, 2018 14:55
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BurntSushi/9d35258444fda83de208d31fdd5e9e69 to your computer and use it in GitHub Desktop.
Save BurntSushi/9d35258444fda83de208d31fdd5e9e69 to your computer and use it in GitHub Desktop.
/* The Computer Language Benchmarks Game
* https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
*
* regex-redux program contributed by BurntSushi.
*/
package main
import (
"fmt"
"io/ioutil"
"os"
"runtime"
"github.com/BurntSushi/rure-go"
)
var variants = []string{
"agggtaaa|tttaccct",
"[cgt]gggtaaa|tttaccc[acg]",
"a[act]ggtaaa|tttacc[agt]t",
"ag[act]gtaaa|tttac[agt]ct",
"agg[act]taaa|ttta[agt]cct",
"aggg[acg]aaa|ttt[cgt]ccct",
"agggt[cgt]aa|tt[acg]accct",
"agggta[cgt]a|t[acg]taccct",
"agggtaa[cgt]|[acg]ttaccct",
}
type Subst struct {
pat, repl string
}
var substs = []Subst{
Subst{"tHa[Nt]", "<4>"},
Subst{"aND|caN|Ha[DS]|WaS", "<3>"},
Subst{"a[NSt]|BY", "<2>"},
Subst{"<[^>]*>", "|"},
Subst{"\\|[^|][^|]*\\|", "-"},
}
func countMatches(pat string, bytes []byte) int {
n := 0
for it := rure.MustCompile(pat).IterBytes(bytes); it.Next(nil); {
n++
}
return n
}
func replaceAll(re *rure.Regex, src []byte, repl []byte) []byte {
dst := make([]byte, 0, len(src))
lastMatch := 0
for it := re.IterBytes(src); it.Next(nil); {
start, end := it.Match()
dst = append(dst, src[lastMatch:start]...)
lastMatch = end
dst = append(dst, repl...)
}
return append(dst, src[lastMatch:]...)
}
func main() {
runtime.GOMAXPROCS(runtime.NumCPU())
bytes, err := ioutil.ReadFile("/dev/stdin")
if err != nil {
fmt.Fprintf(os.Stderr, "can't read input: %s\n", err)
os.Exit(2)
}
ilen := len(bytes)
// Delete the comment lines and newlines
bytes = replaceAll(rure.MustCompile("(>[^\n]+)?\n"), bytes, []byte{})
clen := len(bytes)
mresults := make([]chan int, len(variants))
for i, s := range variants {
ch := make(chan int)
mresults[i] = ch
go func(ss string) {
ch <- countMatches(ss, bytes)
}(s)
}
lenresult := make(chan int)
bb := bytes
go func() {
for _, sub := range substs {
bb = replaceAll(rure.MustCompile(sub.pat), bb, []byte(sub.repl))
}
lenresult <- len(bb)
}()
for i, s := range variants {
fmt.Printf("%s %d\n", s, <-mresults[i])
}
fmt.Printf("\n%d\n%d\n%d\n", ilen, clen, <-lenresult)
}
[andrew@Cheetah benchgame] time bench-native < /tmp/input5000000.txt
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178
50833411
50000000
27388361
real 0m20.709s
user 1m19.839s
sys 0m0.183s
[andrew@Cheetah benchgame] time bench-pcre < /tmp/input5000000.txt
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178
50833411
50000000
27388361
real 0m10.510s
user 0m35.362s
sys 0m0.211s
[andrew@Cheetah benchgame] time bench-rust < /tmp/input5000000.txt
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178
50833411
50000000
27388361
real 0m2.235s
user 0m3.410s
sys 0m0.157s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment