Skip to content

Instantly share code, notes, and snippets.

@meyarivan
Created November 4, 2014 15:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save meyarivan/a36bf3b440f653853dac to your computer and use it in GitHub Desktop.
Save meyarivan/a36bf3b440f653853dac to your computer and use it in GitHub Desktop.
#!/usr/bin/env python2
import sys
grouped = {}
def parse_line(linex):
parts = linex.split(' ', 9)
if (parts[1] == '-') or (parts[8] != '200') or (not parts[6].startswith('/blocklist/3')):
return
request_url = parts[6]
fields = request_url.split('/')
x,y = fields[10].split('%20')[:2]
key = (fields[5], x, y, fields[4], fields[6], fields[9], fields[3])
grouped[key] = grouped.setdefault(key, 0) + 1
def main():
errors = 0
nlines = 0
for i in sys.stdin:
nlines += 1
try:
parse_line(i)
except:
#print >> sys.stderr, i
errors += 1
return nlines, errors
if __name__ == '__main__':
nlines, errors = main()
print >> sys.stderr, 'ngroups %d nerrors %d nlines %d' % (len(grouped), errors, nlines)
for i in grouped:
print ' '.join(i), grouped[i]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment