Skip to content

Instantly share code, notes, and snippets.

@bitprophet
Created December 15, 2011 02:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bitprophet/1479512 to your computer and use it in GitHub Desktop.
Save bitprophet/1479512 to your computer and use it in GitHub Desktop.
import csv
import json
import os
import sys
from collections import defaultdict
from dateutil.parser import parse as dateparse
import requests
target = sys.argv[1]
limit = int(sys.argv[2]) if len(sys.argv) > 2 else None
def go(generator):
seconds = defaultdict(int)
for lineno, line in enumerate(generator):
if limit is not None and lineno >= limit:
break
obj = json.loads(line)
# Normalize to the second for per-second buckets
timestamp = dateparse(obj["@timestamp"]).replace(microsecond=0)
seconds[timestamp] += 1
if not (lineno % 10000):
sys.stderr.write('.'); sys.stderr.flush()
return seconds
def query(uri):
return json.loads(requests.get(uri).content)
def get_next(uri, from_):
result = query(uri + "/_search?q=*&from=%s&size=1000&fields=@timestamp" % from_)
return result['hits']['hits']
def elasticsearch(uri):
"""
Generator for elasticsearch results
"""
from_ = 0
chunksize = 1000
hits = get_next(uri, from_)
while hits:
for hit in hits:
# TODO: rejigger so we're not decoding/recoding/decoding JSON.
# Herp derp.
yield json.dumps(hit['fields'])
from_ += chunksize
hits = get_next(uri, from_)
# Parse logstash output file
if os.path.isfile(target):
with open(target) as fd:
seconds = go(fd)
# Query ES for its results
else:
seconds = go(elasticsearch(target))
with open('logstash.csv', 'w') as fd:
writer = csv.writer(fd)
for index, key in enumerate(sorted(seconds.keys())):
writer.writerow([str(index + 1), str(seconds[key])])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment