Skip to content

Instantly share code, notes, and snippets.

@tomotaka
Created July 12, 2013 04:11
Show Gist options
  • Save tomotaka/5981354 to your computer and use it in GitHub Desktop.
Save tomotaka/5981354 to your computer and use it in GitHub Desktop.
parser for fluentd-s3 log
#!/usr/bin/python
# -*- coding: utf-8 -*-
import simplejson
import re
import pprint
import iso8601
__all__ = ('FluentLogS3', 'parse_fluent_log_line')
_parse_pat = re.compile(r'\A(\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d[\-+]\d\d:\d\d)\s+([^\s]+)\s+(.*)\n\Z')
def parse_fluent_log_line(line):
if line == '':
return None
matching = _parse_pat.match(line)
if matching:
timestamp, tag, content = matching.groups()
timestamp = iso8601.parse_date(timestamp)
content = simplejson.loads(content)
return (timestamp, tag, content)
else:
return None
class FluentLogS3(object):
def __init__(self, fobj):
self.fobj = fobj
def __iter__(self):
return self
def next(self):
line = self.fobj.readline()
result = parse_fluent_log_line(line)
if result is None:
raise StopIteration
else:
return result
if __name__ == '__main__':
#pat = re.compile(r'\A(\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d[\-+]\d\d:\d\d)\s+([^\s]+)\s+(.*)\n\Z')
#f = open('joined-head', 'rb')
#line = f.readline()
#pprint.pprint(parse_fluent_log_line(line))
from gzip import GzipFile
gf = GzipFile('./joined.gz', 'r')
parser = FluentLogS3(gf)
for pline in parser:
ts, tag, content = pline
# analyze content here!
gf.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment