Last active
August 31, 2015 22:47
-
-
Save caelifer/8ae9ce6cc8676554b845 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* The Computer Language Benchmarks Game | |
* http://benchmarksgame.alioth.debian.org/ | |
* | |
* contributed by The Go Authors. | |
* modified by Tylor Arndt. | |
* modified by Chandra Sekar S to use optimized PCRE binding. | |
* modified by Timour Ezeev - use stdlib regexp package + precompile regexps | |
*/ | |
package main | |
import ( | |
"bufio" | |
"fmt" | |
"io" | |
"os" | |
"regexp" | |
"runtime" | |
) | |
var variants = []*regexp.Regexp{ | |
regexp.MustCompile("agggtaaa|tttaccct"), | |
regexp.MustCompile("[cgt]gggtaaa|tttaccc[acg]"), | |
regexp.MustCompile("a[act]ggtaaa|tttacc[agt]t"), | |
regexp.MustCompile("ag[act]gtaaa|tttac[agt]ct"), | |
regexp.MustCompile("agg[act]taaa|ttta[agt]cct"), | |
regexp.MustCompile("aggg[acg]aaa|ttt[cgt]ccct"), | |
regexp.MustCompile("agggt[cgt]aa|tt[acg]accct"), | |
regexp.MustCompile("agggta[cgt]a|t[acg]taccct"), | |
regexp.MustCompile("agggtaa[cgt]|[acg]ttaccct"), | |
} | |
type Subst struct { | |
rgx *regexp.Regexp | |
repl []byte | |
} | |
var substs = []Subst{ | |
{regexp.MustCompile("B"), []byte("(c|g|t)")}, | |
{regexp.MustCompile("D"), []byte("(a|g|t)")}, | |
{regexp.MustCompile("H"), []byte("(a|c|t)")}, | |
{regexp.MustCompile("K"), []byte("(g|t)")}, | |
{regexp.MustCompile("M"), []byte("(a|c)")}, | |
{regexp.MustCompile("N"), []byte("(a|c|g|t)")}, | |
{regexp.MustCompile("R"), []byte("(a|g)")}, | |
{regexp.MustCompile("S"), []byte("(c|g)")}, | |
{regexp.MustCompile("V"), []byte("(a|c|g)")}, | |
{regexp.MustCompile("W"), []byte("(a|t)")}, | |
{regexp.MustCompile("Y"), []byte("(c|t)")}, | |
} | |
func ReadAll(r io.Reader) ([]byte, int, error) { | |
buf := make([]byte, 0, 1024) | |
count := 0 | |
br := bufio.NewReader(r) | |
for { | |
line, err := br.ReadBytes('\n') | |
if err != nil { | |
if err == io.EOF { | |
break // Exit loop | |
} else { | |
return nil, 0, err | |
} | |
} | |
// Count bytes | |
l := len(line) | |
count += l | |
if line[0] != '>' { // skip label/comment lines | |
// strip all '\n' | |
if line[l-1] == '\n' { | |
line = line[:l-1] | |
} | |
buf = append(buf, line...) | |
} | |
} | |
return buf, count, nil | |
} | |
func countMatches(rx *regexp.Regexp, bytes []byte) int { | |
return len(rx.FindAllIndex(bytes, -1)) | |
} | |
func main() { | |
runtime.GOMAXPROCS(runtime.NumCPU()) | |
bytes, ilen, err := ReadAll(os.Stdin) | |
if err != nil { | |
fmt.Fprintf(os.Stderr, "can't read input: %s\n", err) | |
os.Exit(2) | |
} | |
clen := len(bytes) | |
mresults := make([]chan int, len(variants)) | |
for i, r := range variants { | |
ch := make(chan int) | |
mresults[i] = ch | |
go func(intch chan int, rr *regexp.Regexp) { | |
intch <- countMatches(rr, bytes) | |
}(ch, r) | |
} | |
lenresult := make(chan int, 1) | |
bb := append([]byte{}, bytes...) // clone bytes | |
go func() { | |
for _, sub := range substs { | |
bb = sub.rgx.ReplaceAll(bb, sub.repl) | |
} | |
lenresult <- len(bb) | |
}() | |
for i, s := range variants { | |
fmt.Printf("%s %d\n", s, <-mresults[i]) | |
} | |
fmt.Printf("\n%d\n%d\n%d\n", ilen, clen, <-lenresult) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment