Skip to content

Instantly share code, notes, and snippets.

@ericjster
Last active February 10, 2023 02:23
Show Gist options
  • Save ericjster/3acadaa422dd889df18846d4d9e7ae86 to your computer and use it in GitHub Desktop.
Save ericjster/3acadaa422dd889df18846d4d9e7ae86 to your computer and use it in GitHub Desktop.
Golang regexp timing
package main
// Looking at timing of regex.
// Regex example from Damian Gryski's article:
// https://medium.com/@dgryski/speeding-up-regexp-matching-with-ragel-4727f1c16027
//
// Let's measure a hand-coded regex and compare with the regex library and ragel
// state machine. In this case, the hand-coded version is less than 10% of the
// cost. Obviously 1 line of code is more understandable and maintainable than
// 50 lines with hardcoded constants, but it might be useful to know what could
// be achieved.
// go test -test.bench=.
// goos: darwin
// goarch: amd64
// BenchmarkRegex-8 10000000 204 ns/op
// BenchmarkHand-8 50000000 22.7 ns/op
// BenchmarkHand2-8 100000000 17.8 ns/op
// PASS
// Loop over optimization flags. -B = disable bounds check.
//
// for opt in "" -gcflags=-B; do echo "opt=$opt"; go test $opt -test.bench=.; done
// opt=
// goos: darwin
// goarch: amd64
// BenchmarkRegex-8 10000000 208 ns/op
// BenchmarkHand-8 100000000 22.1 ns/op
// BenchmarkHand2-8 100000000 18.1 ns/op
// BenchmarkHand3-8 30000000 52.9 ns/op
// PASS
// ok _/Users/eric/tmp/191116_regex_timing 8.025s
// opt=-gcflags=-B
// goos: darwin
// goarch: amd64
// BenchmarkRegex-8 10000000 210 ns/op
// BenchmarkHand-8 100000000 21.9 ns/op
// BenchmarkHand2-8 100000000 19.8 ns/op
// BenchmarkHand3-8 30000000 52.7 ns/op
// PASS
// ok _/Users/eric/tmp/191116_regex_timing 8.202s
import (
"regexp"
"strings"
"testing"
)
// sample input line from the blog post
var data = []byte(`Jan 18 06:41:30 corecompute sshd[42327]: Failed keyboard-interactive/pam for root from 112.100.68.182 port 48803 ssh2`)
var reSSHD = regexp.MustCompile(`sshd\[\d{5}\]:\s*Failed`)
func BenchmarkRegex(b *testing.B) {
var hits int
for i := 0; i < b.N; i++ {
if reSSHD.Match(data) {
hits++
}
}
if hits <= 0 {
b.Fatalf("No hits")
}
}
func BenchmarkHand(b *testing.B) {
var hits int
for i := 0; i < b.N; i++ {
if matchSSHDHand(data) {
hits++
}
}
if hits <= 0 {
b.Fatalf("No hits")
}
}
func BenchmarkHand2(b *testing.B) {
var hits int
for i := 0; i < b.N; i++ {
if matchSSHDHand2(data) {
hits++
}
}
if hits <= 0 {
b.Fatalf("No hits")
}
}
func BenchmarkHand3(b *testing.B) {
var hits int
for i := 0; i < b.N; i++ {
if matchSSHDHand3(data) {
hits++
}
}
if hits <= 0 {
b.Fatalf("No hits")
}
}
func isdigit(b byte) bool {
return '0' <= b && b <= '9'
}
func isspace(b byte) bool {
return b == ' ' || b == '\t'
}
func matchSSHDHand(data []byte) bool {
// Hand code the regex match.
// regex: "sshd\[\d{5}\]:\s*Failed"
n := len(data)
j := -1
_ = data[n-1] // Maybe help with overflow check
for i := 0; i < n-12; i++ {
if data[i+0] == 's' &&
data[i+1] == 's' &&
data[i+2] == 'h' &&
data[i+3] == 'd' &&
data[i+4] == '[' &&
isdigit(data[i+5]) &&
isdigit(data[i+6]) &&
isdigit(data[i+7]) &&
isdigit(data[i+8]) &&
isdigit(data[i+9]) &&
data[i+10] == ']' &&
data[i+11] == ':' {
j = i
break
}
}
if j < 0 {
return false
}
for ; isspace(data[j]) && j < n-6; j++ {
}
for i := j; i < n-6; i++ {
if data[i+0] == 'F' &&
data[i+1] == 'a' &&
data[i+2] == 'i' &&
data[i+3] == 'l' &&
data[i+4] == 'e' &&
data[i+5] == 'd' {
return true
}
}
return false
}
func matchSSHDHand2(data []byte) bool {
// Hand code the regex match.
// regex: "sshd\[\d{5}\]:\s*Failed"
n := len(data)
_ = data[n-1] // Maybe help with overflow check
for i := 0; i < n-12; i++ {
if data[i+0] == 's' &&
data[i+1] == 's' &&
data[i+2] == 'h' &&
data[i+3] == 'd' &&
data[i+4] == '[' &&
isdigit(data[i+5]) &&
isdigit(data[i+6]) &&
isdigit(data[i+7]) &&
isdigit(data[i+8]) &&
isdigit(data[i+9]) &&
data[i+10] == ']' &&
data[i+11] == ':' {
var j int
for j = i + 12; isspace(data[j]) && j < n-6; j++ {
}
for ; j < n-6; j++ {
if data[j+0] == 'F' &&
data[j+1] == 'a' &&
data[j+2] == 'i' &&
data[j+3] == 'l' &&
data[j+4] == 'e' &&
data[j+5] == 'd' {
return true
}
}
}
}
return false
}
func matchSSHDHand3(data []byte) bool {
// Hand code the regex match.
// regex: "sshd\[\d{5}\]:\s*Failed"
sdata := string(data)
n := len(sdata)
_ = sdata[n-1] // Maybe help with overflow check
for {
// Note that strings.Index uses SSE and AVX instructions.
i := strings.Index(sdata, "sshd[")
if i < 0 || i+5+7 >= n {
return false
}
i += 5
if isdigit(sdata[i+0]) &&
isdigit(sdata[i+1]) &&
isdigit(sdata[i+2]) &&
isdigit(sdata[i+3]) &&
isdigit(sdata[i+4]) &&
sdata[i+5] == ']' &&
sdata[i+6] == ':' {
i += 7
for ; isspace(sdata[i]) && i+5+1 < n; i++ {
}
if sdata[i+0] == 'F' &&
sdata[i+1] == 'a' &&
sdata[i+2] == 'i' &&
sdata[i+3] == 'l' &&
sdata[i+4] == 'e' &&
sdata[i+5] == 'd' {
return true
}
}
sdata = sdata[5:]
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment