Created
January 15, 2014 19:32
-
-
Save jcorbin/8442844 to your computer and use it in GitHub Desktop.
Simple functions for converting raw graphite data to pandas time series / frame. Reasonably fast too; my test set was a 16MiB 350 metric dump, timing on my machine to parse is ~400ms.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
from cStringIO import StringIO | |
def parse_raw_graphite_metric(line): | |
head, body = line.rstrip('\n').split('|', 1) | |
name, since, until, freq = head.replace('\n', '').split(',') | |
since = float(since) * 1e9 | |
until = float(until) * 1e9 | |
freq = '%ss' % int(freq) | |
data = body.rstrip('\n').split(',') | |
index = pd.date_range(start=since, end=until, freq=freq, tz='UTC') | |
index = index[:len(data)] | |
S = pd.Series( | |
name = name, | |
data = data, | |
index = index, | |
) | |
S.replace('None', np.nan, inplace=True) | |
return S.astype(float) | |
def parse_raw_graphite(content): | |
# in testing on a 16MiB 350 metric data dump: | |
# - str.split ~ 11ms | |
# - custom lazy split ~ 9ms | |
# - cStringIO ~ 6ms | |
metrics = ( | |
parse_raw_graphite_metric(line) | |
for line in StringIO(content) | |
) | |
return pd.DataFrame.from_items( | |
(metric.name, metric) | |
for metric in metrics | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment