Skip to content

Instantly share code, notes, and snippets.

@davisp
Created October 25, 2013 20:57
Show Gist options
  • Save davisp/7161716 to your computer and use it in GitHub Desktop.
Save davisp/7161716 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import contextlib as ctx
import gzip
import re
import sys
def reconstruct(fname):
ret = {}
with ctx.closing(gzip.open(fname)) as handle:
hdr = False
src, tgt = (None, None)
for idx, line in enumerate(handle):
if line[:1] == '#':
hdr = True
continue
if hdr:
hdr = False
# Terrible hack for header detection
bits = line.split()
if len(bits) < 4 or bits[2] != '->':
continue
(src, tgt) = bits[1], bits[3]
ret.setdefault((src, tgt), [])
elif src is not None:
ret[(src, tgt)].append((idx, line))
return ret
def conversations(data):
ret = {}
keys = data.keys()
keys.sort()
keys = filter(lambda (s, t): s < t, keys)
keys = filter(lambda (s, t): (t, s) in data, keys)
for (s, t) in keys:
lines = data[(s, t)]
lines.extend(data[(t, s)])
lines.sort()
ret[(s, t)] = "".join(l[1] for l in lines)
return ret
def main():
if len(sys.argv) != 2:
print "usage: %s logfile.gz" % sys.argv[0]
exit(1)
data = reconstruct(sys.argv[1])
convs = conversations(data)
for (s, t) in convs:
print "## %s %s" % (s, t)
print convs[(s, t)]
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment