Skip to content

Instantly share code, notes, and snippets.

@caelifer
Last active August 31, 2015 22:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save caelifer/8ae9ce6cc8676554b845 to your computer and use it in GitHub Desktop.
Save caelifer/8ae9ce6cc8676554b845 to your computer and use it in GitHub Desktop.
/* The Computer Language Benchmarks Game
* http://benchmarksgame.alioth.debian.org/
*
* contributed by The Go Authors.
* modified by Tylor Arndt.
* modified by Chandra Sekar S to use optimized PCRE binding.
* modified by Timour Ezeev - use stdlib regexp package + precompile regexps
*/
package main
import (
"bufio"
"fmt"
"io"
"os"
"regexp"
"runtime"
)
var variants = []*regexp.Regexp{
regexp.MustCompile("agggtaaa|tttaccct"),
regexp.MustCompile("[cgt]gggtaaa|tttaccc[acg]"),
regexp.MustCompile("a[act]ggtaaa|tttacc[agt]t"),
regexp.MustCompile("ag[act]gtaaa|tttac[agt]ct"),
regexp.MustCompile("agg[act]taaa|ttta[agt]cct"),
regexp.MustCompile("aggg[acg]aaa|ttt[cgt]ccct"),
regexp.MustCompile("agggt[cgt]aa|tt[acg]accct"),
regexp.MustCompile("agggta[cgt]a|t[acg]taccct"),
regexp.MustCompile("agggtaa[cgt]|[acg]ttaccct"),
}
type Subst struct {
rgx *regexp.Regexp
repl []byte
}
var substs = []Subst{
{regexp.MustCompile("B"), []byte("(c|g|t)")},
{regexp.MustCompile("D"), []byte("(a|g|t)")},
{regexp.MustCompile("H"), []byte("(a|c|t)")},
{regexp.MustCompile("K"), []byte("(g|t)")},
{regexp.MustCompile("M"), []byte("(a|c)")},
{regexp.MustCompile("N"), []byte("(a|c|g|t)")},
{regexp.MustCompile("R"), []byte("(a|g)")},
{regexp.MustCompile("S"), []byte("(c|g)")},
{regexp.MustCompile("V"), []byte("(a|c|g)")},
{regexp.MustCompile("W"), []byte("(a|t)")},
{regexp.MustCompile("Y"), []byte("(c|t)")},
}
func ReadAll(r io.Reader) ([]byte, int, error) {
buf := make([]byte, 0, 1024)
count := 0
br := bufio.NewReader(r)
for {
line, err := br.ReadBytes('\n')
if err != nil {
if err == io.EOF {
break // Exit loop
} else {
return nil, 0, err
}
}
// Count bytes
l := len(line)
count += l
if line[0] != '>' { // skip label/comment lines
// strip all '\n'
if line[l-1] == '\n' {
line = line[:l-1]
}
buf = append(buf, line...)
}
}
return buf, count, nil
}
func countMatches(rx *regexp.Regexp, bytes []byte) int {
return len(rx.FindAllIndex(bytes, -1))
}
func main() {
runtime.GOMAXPROCS(runtime.NumCPU())
bytes, ilen, err := ReadAll(os.Stdin)
if err != nil {
fmt.Fprintf(os.Stderr, "can't read input: %s\n", err)
os.Exit(2)
}
clen := len(bytes)
mresults := make([]chan int, len(variants))
for i, r := range variants {
ch := make(chan int)
mresults[i] = ch
go func(intch chan int, rr *regexp.Regexp) {
intch <- countMatches(rr, bytes)
}(ch, r)
}
lenresult := make(chan int, 1)
bb := append([]byte{}, bytes...) // clone bytes
go func() {
for _, sub := range substs {
bb = sub.rgx.ReplaceAll(bb, sub.repl)
}
lenresult <- len(bb)
}()
for i, s := range variants {
fmt.Printf("%s %d\n", s, <-mresults[i])
}
fmt.Printf("\n%d\n%d\n%d\n", ilen, clen, <-lenresult)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment