Skip to content

Instantly share code, notes, and snippets.

@jayswan
Created September 13, 2014 02:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jayswan/e0a97681ef8f66f367f4 to your computer and use it in GitHub Desktop.
Save jayswan/e0a97681ef8f66f367f4 to your computer and use it in GitHub Desktop.
from collections import Counter
from csv import DictReader
import gzip
from pprint import pprint
from sys import argv
FIELDNAMES = ['ts', 'uid', 'id.orig_h', 'id.orig_p', 'id.resp_h', 'id.resp_p', 'proto', 'trans_id', 'query', 'qclass', 'qclass_name', 'qtype', 'qtype_name', 'rcode', 'rcode_name', 'AA', 'TC', 'RD', 'RA', 'Z', 'answersTTLs', 'rejected']
def ingest(files, delim='\t', qchar='"'):
""" return a list of dictionaries representing lines in a CSV file """
records = []
for fn in files:
print 'loading %s' % fn
if fn.endswith('.gz'):
d = DictReader(gzip.open(fn), delimiter=delim, quotechar=qchar, fieldnames=FIELDNAMES)
else:
d = DictReader(open(fn), delimiter=delim, quotechar=qchar, fieldnames=FIELDNAMES)
records.extend([item for item in d])
return records
def count(records, field):
""" return a 'uniq -c' style list for a field in a list of dicts """
return Counter([item[field] for item in records]).most_common()
def uniq(records, field):
""" return a 'uniq' style set for a field in a list of dicts """
return {item[field] for item in records}
def linecount(records):
return len(records)
def keys(records):
return sorted(records[0].keys())
def profile(records):
print 'Number of records:'
print linecount(records)
print
print 'fields:'
pprint(keys(records))
def main():
profile(ingest(argv[1]))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment