Skip to content

Instantly share code, notes, and snippets.

@shrkw
Created August 29, 2014 22:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shrkw/5e39c59cf0391783fd55 to your computer and use it in GitHub Desktop.
Save shrkw/5e39c59cf0391783fd55 to your computer and use it in GitHub Desktop.
Scalaのパーザコンビネータでアクセスログをパースしてみる ref: http://qiita.com/shrkw/items/e40f04a74840608fe62d
package com.example.service
import scala.io.Source
import scala.util.parsing.combinator.RegexParsers
object CloudFlareLogParserService {
def resolve: Unit = {
val accessLog =
"""
|www.cloudflare.com 1.1.1.1 1383426540 "GET / HTTP/1.1" 200 11022 "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0)" "CLEAN.HUMAN 1383426470.808 off" "http://blog.cloudflare.com/"
|www.cloudflare.com 1.1.1.1 1383426540 "GET / HTTP/1.1" 200 11022 "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0)" "CLEAN.HUMAN 1383426470.808 off" "http://blog.cloudflare.com/"
""".stripMargin
accessLog.split('\n').foreach {
case line if line.trim.length != 0 => {
try {
println(CloudFlareLogParser.parseLine(line))
} catch {
case e: IllegalArgumentException => println(e)
}
}
case _ =>
}
}
case class CloudFlareAccessLog(host: String, ipAddress: String, size: Int, referrer: String)
object CloudFlareLogParser extends RegexParsers {
def lines = repsep(line, nl)
def line = host ~ ipAddress ~ p2 ~ p3 ^^ { case host ~ ipAddress ~ size ~ referrer => CloudFlareAccessLog(host, ipAddress, size.toInt, referrer)}
def nl = opt('\r') <~ '\n'
def p2 = logWindow ~> request ~> statusCode ~> size
def p3 = userAgent ~> cloudFlareInfo ~> referrer
def host = """[\w.]+""".r
def ipAddress = """[\d.]+""".r
def logWindow = """\d+""".r
def request = "\"" ~> "[^\"]*".r <~ "\""
def statusCode = """\d{3}""".r
def size = """\d+""".r
def userAgent = "\"" ~> "[^\"]*".r <~ "\""
def cloudFlareInfo = "\"" ~> "[\\w. ]*".r <~ "\""
def referrer = "\"" ~> "[^\"]*".r <~ "\""
def parseLines(input: String): List[CloudFlareAccessLog] = parseAll(lines, input).getOrElse {
throw new IllegalArgumentException("Failed to parse: " + input)
}
def parseLine(input: String): CloudFlareAccessLog = parseLines(input).head
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment