Skip to content

Instantly share code, notes, and snippets.

@igorcoding
Created January 29, 2014 16:03
Show Gist options
  • Save igorcoding/8691140 to your computer and use it in GitHub Desktop.
Save igorcoding/8691140 to your computer and use it in GitHub Desktop.
Apache access log file regex parsing
import re
from pprint import pprint
def parse_request(request_str):
pattern = re.compile(r"""(?P<method>.*)\s # method, space
(?P<url>.*)\s # url, space
(?P<protocol>.*) # protocol
""", re.VERBOSE)
match = pattern.match(request_str)
request = {
'method': match.group("method"),
'url': match.group("url"),
'protocol': match.group("protocol"),
'__raw__': request_str
}
return request
def parse(s):
pattern = re.compile(r"""(?P<ip>(\d{1,3}\.){3}\d{1,3})\s # ip, space
(?P<client>.*)\s # client, space
(?P<username>.*)\s # username, space
\[(?P<date>.*)\]\s # date, space
"(?P<request>.*)"\s # request, space
(?P<response>\d{3})\s # response code
(?P<ret>.*) # return size
""", re.VERBOSE)
match = pattern.match(s)
log_entry = {
'ip': match.group("ip"),
'client': match.group("client"),
'username': match.group("username"),
'date': match.group("date"),
'request': parse_request(match.group("request")),
'response': match.group("response"),
'return_size': match.group("ret")
}
return log_entry
if __name__ == "__main__":
s = "10.223.157.186 - - [15/Jul/2009:15:50:35 -0700] \"GET /assets/js/lowpro.js HTTP/1.1\" 200 10469"
log_e = parse(s)
pprint(log_e)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment