Skip to content

Instantly share code, notes, and snippets.

@davidandrzej
Last active September 18, 2017 19:12
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save davidandrzej/60df21aaad0c868456ce422fb56d3e09 to your computer and use it in GitHub Desktop.
Save davidandrzej/60df21aaad0c868456ce422fb56d3e09 to your computer and use it in GitHub Desktop.
Query Sumo Metrics API, put results in Pandas Dataframe
import functools
import pdb
import requests
import json
import pandas as pd
from datetime import datetime
from datetime import timedelta
import matplotlib.pyplot as plt
# TO RUN - SET / PROVIDE (user,pw,query)
query='metricsstore ingest_rate m1_rate | sum'
# Placeholder endpoint config
endpoint = 'https://api.sumologic.com/api/v1/metrics/results'
# #
# # Dummy time duration: -1h
# #
endTime = datetime.now()
startTime = endTime - timedelta(hours=1)
# How to represent a map of metrics dimensions as a simple string?
# Simple policy: just use "rowId-_rawName" as metric identifier
def simpleSeriesName(dims):
if '_rawName' in dims:
return '%s-%s' % (dims['rowId'], dims['_rawName'])
else:
dimRep = '::'.join(['%s:%s' % (k,v) for (k,v) in dims.items()
if k != 'rowId'])
return '%s-%s' % (dims['rowId'], dimRep)
def exampleQuery(user, pw, query):
constructed = makeQuery(query, startTime, endTime)
results = runQuery(endpoint, user, pw, constructed)
return convertResults(simpleSeriesName, results)
#
# Helpers to construct and run the query
#
def dtToEpochMillis(dt):
return int(dt.timestamp() * 1000)
def makeQuery(query, startTime, endTime):
return {"query": [{"query":query, "rowId":"A"}],
"startTime": dtToEpochMillis(startTime),
"endTime": dtToEpochMillis(endTime),
"requestedDataPoints": 600, "maxDataPoints": 800}
def runQuery(endpoint, user, pw, queryJson):
return requests.post(endpoint,
auth=(user, pw),
json=queryJson)
#
# Re-structure the results into a pandas DataFrame
#
# Convert to a nice pandas Series with DatetimeIndex
def convertDatapoints(series):
# value: [float], actual values
# timestamp: [int], epoch millis
dps = series['datapoints']
return pd.Series(dps['value'],
pd.to_datetime(dps['timestamp'],unit='ms'))
def convertDimensions(rowId, dims):
# INPUT (rowId, [{key : foo, value : buzz},...])
# OUTPUT {key : value} (including key="rowId")
converted = dict([(d['key'],d['value']) for d in dims])
converted.update({"rowId" : rowId})
return converted
# Build a dict of pandas Series for a single row (eg, rowId='A')
def convertResponse(nameBuilder, response):
if 'message' in response:
print(response['message'])
return {}
else:
return dict([(nameBuilder(
convertDimensions(response['rowId'],
series['metric']['dimensions'])),
convertDatapoints(series))
for series in response['results'] ])
# Given HTTP call results, return pandas DataFrame
# -index = DatetimeIndex
# -columns = each individual metric
def convertResults(nameBuilder, results):
converted = {}
for response in results.json()['response']:
converted.update(convertResponse(nameBuilder, response))
return pd.DataFrame(converted)
df = exampleQuery(user, pw, query)
# dff = df.fillna(method='backfill')
# dff.plot()
# plt.show()
# df.to_csv("mydata.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment