-
-
Save BurntSushi/9d35258444fda83de208d31fdd5e9e69 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* The Computer Language Benchmarks Game | |
* https://salsa.debian.org/benchmarksgame-team/benchmarksgame/ | |
* | |
* regex-redux program contributed by BurntSushi. | |
*/ | |
package main | |
import ( | |
"fmt" | |
"io/ioutil" | |
"os" | |
"runtime" | |
"github.com/BurntSushi/rure-go" | |
) | |
var variants = []string{ | |
"agggtaaa|tttaccct", | |
"[cgt]gggtaaa|tttaccc[acg]", | |
"a[act]ggtaaa|tttacc[agt]t", | |
"ag[act]gtaaa|tttac[agt]ct", | |
"agg[act]taaa|ttta[agt]cct", | |
"aggg[acg]aaa|ttt[cgt]ccct", | |
"agggt[cgt]aa|tt[acg]accct", | |
"agggta[cgt]a|t[acg]taccct", | |
"agggtaa[cgt]|[acg]ttaccct", | |
} | |
type Subst struct { | |
pat, repl string | |
} | |
var substs = []Subst{ | |
Subst{"tHa[Nt]", "<4>"}, | |
Subst{"aND|caN|Ha[DS]|WaS", "<3>"}, | |
Subst{"a[NSt]|BY", "<2>"}, | |
Subst{"<[^>]*>", "|"}, | |
Subst{"\\|[^|][^|]*\\|", "-"}, | |
} | |
func countMatches(pat string, bytes []byte) int { | |
n := 0 | |
for it := rure.MustCompile(pat).IterBytes(bytes); it.Next(nil); { | |
n++ | |
} | |
return n | |
} | |
func replaceAll(re *rure.Regex, src []byte, repl []byte) []byte { | |
dst := make([]byte, 0, len(src)) | |
lastMatch := 0 | |
for it := re.IterBytes(src); it.Next(nil); { | |
start, end := it.Match() | |
dst = append(dst, src[lastMatch:start]...) | |
lastMatch = end | |
dst = append(dst, repl...) | |
} | |
return append(dst, src[lastMatch:]...) | |
} | |
func main() { | |
runtime.GOMAXPROCS(runtime.NumCPU()) | |
bytes, err := ioutil.ReadFile("/dev/stdin") | |
if err != nil { | |
fmt.Fprintf(os.Stderr, "can't read input: %s\n", err) | |
os.Exit(2) | |
} | |
ilen := len(bytes) | |
// Delete the comment lines and newlines | |
bytes = replaceAll(rure.MustCompile("(>[^\n]+)?\n"), bytes, []byte{}) | |
clen := len(bytes) | |
mresults := make([]chan int, len(variants)) | |
for i, s := range variants { | |
ch := make(chan int) | |
mresults[i] = ch | |
go func(ss string) { | |
ch <- countMatches(ss, bytes) | |
}(s) | |
} | |
lenresult := make(chan int) | |
bb := bytes | |
go func() { | |
for _, sub := range substs { | |
bb = replaceAll(rure.MustCompile(sub.pat), bb, []byte(sub.repl)) | |
} | |
lenresult <- len(bb) | |
}() | |
for i, s := range variants { | |
fmt.Printf("%s %d\n", s, <-mresults[i]) | |
} | |
fmt.Printf("\n%d\n%d\n%d\n", ilen, clen, <-lenresult) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[andrew@Cheetah benchgame] time bench-native < /tmp/input5000000.txt | |
agggtaaa|tttaccct 356 | |
[cgt]gggtaaa|tttaccc[acg] 1250 | |
a[act]ggtaaa|tttacc[agt]t 4252 | |
ag[act]gtaaa|tttac[agt]ct 2894 | |
agg[act]taaa|ttta[agt]cct 5435 | |
aggg[acg]aaa|ttt[cgt]ccct 1537 | |
agggt[cgt]aa|tt[acg]accct 1431 | |
agggta[cgt]a|t[acg]taccct 1608 | |
agggtaa[cgt]|[acg]ttaccct 2178 | |
50833411 | |
50000000 | |
27388361 | |
real 0m20.709s | |
user 1m19.839s | |
sys 0m0.183s | |
[andrew@Cheetah benchgame] time bench-pcre < /tmp/input5000000.txt | |
agggtaaa|tttaccct 356 | |
[cgt]gggtaaa|tttaccc[acg] 1250 | |
a[act]ggtaaa|tttacc[agt]t 4252 | |
ag[act]gtaaa|tttac[agt]ct 2894 | |
agg[act]taaa|ttta[agt]cct 5435 | |
aggg[acg]aaa|ttt[cgt]ccct 1537 | |
agggt[cgt]aa|tt[acg]accct 1431 | |
agggta[cgt]a|t[acg]taccct 1608 | |
agggtaa[cgt]|[acg]ttaccct 2178 | |
50833411 | |
50000000 | |
27388361 | |
real 0m10.510s | |
user 0m35.362s | |
sys 0m0.211s | |
[andrew@Cheetah benchgame] time bench-rust < /tmp/input5000000.txt | |
agggtaaa|tttaccct 356 | |
[cgt]gggtaaa|tttaccc[acg] 1250 | |
a[act]ggtaaa|tttacc[agt]t 4252 | |
ag[act]gtaaa|tttac[agt]ct 2894 | |
agg[act]taaa|ttta[agt]cct 5435 | |
aggg[acg]aaa|ttt[cgt]ccct 1537 | |
agggt[cgt]aa|tt[acg]accct 1431 | |
agggta[cgt]a|t[acg]taccct 1608 | |
agggtaa[cgt]|[acg]ttaccct 2178 | |
50833411 | |
50000000 | |
27388361 | |
real 0m2.235s | |
user 0m3.410s | |
sys 0m0.157s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment