Skip to content

Instantly share code, notes, and snippets.

@davidandrzej davidandrzej/pandasumo.py Secret
Last active Sep 18, 2017

Embed
What would you like to do?
Query Sumo Metrics API, put results in Pandas Dataframe
import functools
import pdb
import requests
import json
import pandas as pd
from datetime import datetime
from datetime import timedelta
import matplotlib.pyplot as plt
# TO RUN - SET / PROVIDE (user,pw,query)
query='metricsstore ingest_rate m1_rate | sum'
# Placeholder endpoint config
endpoint = 'https://api.sumologic.com/api/v1/metrics/results'
# #
# # Dummy time duration: -1h
# #
endTime = datetime.now()
startTime = endTime - timedelta(hours=1)
# How to represent a map of metrics dimensions as a simple string?
# Simple policy: just use "rowId-_rawName" as metric identifier
def simpleSeriesName(dims):
if '_rawName' in dims:
return '%s-%s' % (dims['rowId'], dims['_rawName'])
else:
dimRep = '::'.join(['%s:%s' % (k,v) for (k,v) in dims.items()
if k != 'rowId'])
return '%s-%s' % (dims['rowId'], dimRep)
def exampleQuery(user, pw, query):
constructed = makeQuery(query, startTime, endTime)
results = runQuery(endpoint, user, pw, constructed)
return convertResults(simpleSeriesName, results)
#
# Helpers to construct and run the query
#
def dtToEpochMillis(dt):
return int(dt.timestamp() * 1000)
def makeQuery(query, startTime, endTime):
return {"query": [{"query":query, "rowId":"A"}],
"startTime": dtToEpochMillis(startTime),
"endTime": dtToEpochMillis(endTime),
"requestedDataPoints": 600, "maxDataPoints": 800}
def runQuery(endpoint, user, pw, queryJson):
return requests.post(endpoint,
auth=(user, pw),
json=queryJson)
#
# Re-structure the results into a pandas DataFrame
#
# Convert to a nice pandas Series with DatetimeIndex
def convertDatapoints(series):
# value: [float], actual values
# timestamp: [int], epoch millis
dps = series['datapoints']
return pd.Series(dps['value'],
pd.to_datetime(dps['timestamp'],unit='ms'))
def convertDimensions(rowId, dims):
# INPUT (rowId, [{key : foo, value : buzz},...])
# OUTPUT {key : value} (including key="rowId")
converted = dict([(d['key'],d['value']) for d in dims])
converted.update({"rowId" : rowId})
return converted
# Build a dict of pandas Series for a single row (eg, rowId='A')
def convertResponse(nameBuilder, response):
if 'message' in response:
print(response['message'])
return {}
else:
return dict([(nameBuilder(
convertDimensions(response['rowId'],
series['metric']['dimensions'])),
convertDatapoints(series))
for series in response['results'] ])
# Given HTTP call results, return pandas DataFrame
# -index = DatetimeIndex
# -columns = each individual metric
def convertResults(nameBuilder, results):
converted = {}
for response in results.json()['response']:
converted.update(convertResponse(nameBuilder, response))
return pd.DataFrame(converted)
df = exampleQuery(user, pw, query)
# dff = df.fillna(method='backfill')
# dff.plot()
# plt.show()
# df.to_csv("mydata.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.