Skip to content

Instantly share code, notes, and snippets.

@wangzaixiang
Created December 10, 2019 02:35
Show Gist options
  • Save wangzaixiang/2501c16f7b09d6ef5b1e5e9efe827bac to your computer and use it in GitHub Desktop.
Save wangzaixiang/2501c16f7b09d6ef5b1e5e9efe827bac to your computer and use it in GitHub Desktop.
A simple compare for HyperScan and JavaPattern. It looks the Java Pattern is faster than HS. what is the problem?
import java.util
import java.util.regex.Pattern
import com.gliwka.hyperscan.wrapper._
object Main {
val regexp = """(?s)\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.\d{3})\] \[([^\[\]]+)\] \[([^\[\]]+)\] \[([^\[\]]+)\] - \[(traceId:|t:)(.*) (spanId:|s:)(-?[A-Fa-f0-9]*) (pSpanId:|p:)(-?[A-Fa-f0-9]*)] (.*)$"""
val line = "[2019-12-09 14:39:29.298] [INFO] [Logging-Thread-1] [com.yunji.scs.appprice.service.PriceApplicationForAppServiceImpl] - [t: s: p:] log msg"
val LOOP = 10_000_000
def main(args: Array[String]): Unit = {
// main1(args)
main2(args)
}
def main1(args: Array[String]): Unit = {
val expr = new Expression(regexp, util.EnumSet.of(ExpressionFlag.DOTALL))
val db = Database.compile(expr)
val scanner = new Scanner
scanner.allocScratch(db)
var loop = LOOP
val t0 = System.currentTimeMillis()
while(loop > 0) {
val matches = scanner.scan(db, line)
assert(matches.size() == 1)
loop -= 1
}
val t1 = System.currentTimeMillis()
println(s"using HyperScan: total time:${t1-t0}ms /$LOOP")
// println(s"matches = $matches")
}
def main2(args: Array[String]):Unit = {
var loop = LOOP
val t0 = System.currentTimeMillis()
while (loop > 0) {
val pattern = Pattern.compile(regexp, Pattern.DOTALL)
val mather = pattern.matcher(line)
assert(mather.matches() == true)
loop -= 1
}
val t1 = System.currentTimeMillis()
println(s"using JavaPattern total time:${t1-t0}ms/ $LOOP")
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment