Skip to content

Instantly share code, notes, and snippets.

@akr4
Forked from seratch/gist:1297691
Created October 19, 2011 10:57
Show Gist options
  • Save akr4/1297970 to your computer and use it in GitHub Desktop.
Save akr4/1297970 to your computer and use it in GitHub Desktop.
#daimonscala 19-2 "Apache access_log(combined) parser"
object LogParser {
import java.net._
import org.joda.time.DateTime
import org.joda.time.format.DateTimeFormat
case class Access(
ipAddress: InetAddress,
ident: String,
user: String,
time: DateTime,
method: String,
uri: URI,
version: String,
status: Int,
bytes: Int,
referrer: String,
userAgent: String
)
import util.parsing.combinator._
object AccessLogParser extends RegexParsers {
override val whiteSpace = """[ \t]+""".r
import java.text.SimpleDateFormat
val timeFormat = DateTimeFormat.forPattern("dd/MMM/yyyy:HH:mm:ss Z").withLocale(java.util.Locale.US)
def eol: Parser[Any] = """\r?\n""".r ^^ { s => "" }
def lines: Parser[List[Access]] = (line*)
def line: Parser[Access] =
ipAddress ~ ident ~ user ~ time ~ method ~ uri ~ version ~ status ~ bytes ~ referrer ~ userAgent <~ eol ^^ {
case ipAddress ~ ident ~ user ~ time ~ method ~ uri ~ version ~ status ~ bytes ~ referrer ~ userAgent =>
Access(ipAddress, ident, user, time, method, uri, version, status, bytes, referrer, userAgent)
}
def ipAddress: Parser[InetAddress] = """[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}""".r ^^ (s => InetAddress.getByName(s))
def ident: Parser[String] = """[(?:\w+)-]""".r
def user: Parser[String] = """[(?:\w+)-]""".r
def time: Parser[DateTime] = "[" ~> """\S+ [^ \]]+""".r <~ "]" ^^ { timeFormat.parseDateTime(_) }
def method: Parser[String] = "\"" ~> """[A-Z]+""".r
def uri: Parser[URI] = """\S+""".r ^^ { new URI(_) }
def version: Parser[String] = """[^ "]+""".r <~ "\""
def status: Parser[Int] = """\d+""".r ^^ { _.toInt }
def bytes: Parser[Int] = """[^ ]+""".r ^^ { case "-" => 0; case s => s.toInt }
def referrer: Parser[String] = "\"" ~> """[^"]+""".r <~ "\""
def userAgent: Parser[String] = "\"" ~> """[^"]+""".r <~ "\""
def parse(json: String): ParseResult[Any] = parseAll(lines, json)
}
def time: Parser[DateTime] = "[" ~> """\S+ [^ \]]+""".r <~ "]" ^^ { timeFormat.parseDateTime(_) }
def method: Parser[String] = "\"" ~> """[A-Z]+""".r
def uri: Parser[URI] = """\S+""".r ^^ { new URI(_) }
def version: Parser[String] = """[^ "]+""".r <~ "\""
def status: Parser[Int] = """\d+""".r ^^ { _.toInt }
def bytes: Parser[Int] = """[^ ]+""".r ^^ { case "-" => 0; case s => s.toInt }
def referrer: Parser[String] = "\"" ~> """[^"]+""".r <~ "\""
def userAgent: Parser[String] = "\"" ~> """[^"]+""".r <~ "\""
def parse(json: String): ParseResult[Any] = parseAll(lines, json)
}
def main(args: Array[String]) {
val accessLog = """66.249.69.220 - - [03/Oct/2011:01:22:54 +0900] "GET /blog/23/ HTTP/1.1" 200 22716 "-" "SAMSUNG-SGH-E250/1.0 Profile/MIDP-2.0 Configuration/CLDC-1.1 UP.Browser/6.2.3.3.c.1.101 (GUI) MMP/2.0 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)"
64.233.172.34 - - [16/Oct/2011:04:03:27 +0900] "GET /api/search/?format=atom&q=testtest HTTP/1.1" 200 20 "-" "Rome Client (http://tinyurl.com/64t5n) Ver: UNKNOWN AppEngine-Google; (+http://code.google.com/appengine; appid: xxxxx)"
64.233.172.36 - - [19/Oct/2011:05:18:52 +0900] "GET / HTTP/1.1" 304 - "http://twitter.com/" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1"
"""
val result = AccessLogParser.parse(accessLog)
println(result.get)
//println(result.get)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment