Skip to content

Instantly share code, notes, and snippets.

@jkoelker
Created April 27, 2013 19:37
Show Gist options
  • Save jkoelker/5474392 to your computer and use it in GitHub Desktop.
Save jkoelker/5474392 to your computer and use it in GitHub Desktop.
created by github.com/tr3buchet/gister
#!/usr/bin/env python
import re
import sys
# NOTE(jkoelker) Yea its regex, I hate it too
pattern = re.compile(r' '.join([r'(?P<host>.*?)',
r'-(?P<unknown>.*?)-',
r'(?P<date>.*?)',
r'"(?P<method>.*?)',
r'(?P<page>.*?)',
r'(?P<protocol>.*?)"',
r'(?P<code>\d*)',
r'(?P<bytes>.*?)',
r'"(?P<referer>.*?)"',
r'"(?P<useragent>.*?)"']))
uuid = re.compile(r'-'.join([r'[a-f0-9]{8}',
r'[a-f0-9]{4}',
r'[a-f0-9]{4}',
r'[a-f0-9]{4}',
r'[a-f0-9]{12}']))
def main():
for line in sys.stdin:
try:
line = ' '.join(line.strip().split()[4:])
m = pattern.match(line)
res = m.groupdict()
page = res['page'].strip()
index = 3
if page.startswith('https'):
index = 5
page = page.split('/')[index:]
page = [re.sub(uuid, '<UUID>', part) for part in page]
page = '/'.join(page)
if not page.startswith('/'):
page = '/' + page
res['page'] = page
args = (res['host'], res['date'], res['method'], res['page'],
res['protocol'], res['code'], res['bytes'],
res['referer'], res['useragent'])
print '%s - - %s "%s %s %s" %s %s "%s" "%s"' % args
except Exception:
continue
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment