Created
August 22, 2019 13:51
-
-
Save stewartm888/cc09abc91f45b083b3181a8a72d8ed0d to your computer and use it in GitHub Desktop.
dist_extract.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
| |
import os | |
import glob | |
import argparse | |
import numpy as np | |
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
| |
from btrdb.utils.timez import * | |
| |
| |
DATA = "data/*.csv.gz" | |
| |
| |
def load_data(path=DATA, limit=None, deltas=False): | |
for idx, fpath in enumerate(glob.glob(path)): | |
if limit is not None and idx >= limit: | |
break | |
name = os.path.basename(fpath).rstrip(".csv.gz") | |
df = pd.read_csv(fpath, compression="gzip") | |
| |
if deltas: | |
yield name, df.diff() | |
else: | |
yield name, df | |
| |
| |
def counts(args): | |
dfc = None | |
for fname, df in load_data(limit=args.limit): | |
count = df.count() | |
names = pd.Series([fname]*len(count), index=count.index) | |
count = pd.concat([count, names], axis=1) | |
count.columns = ["count", "device"] | |
| |
if dfc is None: | |
dfc = count | |
else: | |
dfc = pd.concat([dfc, count], axis=0) | |
| |
dfc.to_csv(args.outpath, header=False) | |
| |
| |
def dist(args): | |
dfd = None | |
for fname, df in load_data(limit=args.limit, deltas=True): | |
if dfd is None: | |
dfd = df.describe().transpose() | |
else: | |
dfd = pd.concat([dfd, df.describe().transpose()], axis=0) | |
| |
dfd.to_csv(args.outpath, header=True) | |
| |
| |
def main(args): | |
raise Exception("please specify a command") | |
| |
| |
if __name__ == "__main__": | |
# Global arguments | |
args = { | |
("-l", "--limit"): { | |
"type": int, "metavar": "N", "default": None, | |
"help": "limit the number of datasets read" | |
}, | |
} | |
| |
# Sub commands | |
cmds = { | |
"count": { | |
"func": counts, | |
("-o", "--outpath"): { | |
"type": str, "metavar": "FILE", "default": "counts.csv.gz", | |
"help": "location to write the counts output file", | |
}, | |
}, | |
"dist": { | |
"func": dist, | |
("-o", "--outpath"): { | |
"type": str, "metavar": "FILE", "default": "delta_dist.csv.gz", | |
"help": "location to write the counts output file", | |
}, | |
}, | |
} | |
| |
parser = argparse.ArgumentParser(description="wrangle timestamps for jitter analysis") | |
parser.set_defaults(func=main) | |
subparsers = parser.add_subparsers(help="wrangling commands") | |
| |
for pargs, kwargs in args.items(): | |
if isinstance(pargs, str): | |
pargs = (pargs,) | |
parser.add_argument(*pargs, **kwargs) | |
| |
for cmd, cargs in cmds.items(): | |
sp = subparsers.add_parser(cmd) | |
sp.set_defaults(func=cargs.pop("func")) | |
for pargs, kwargs in cargs.items(): | |
if isinstance(pargs, str): | |
pargs = (pargs,) | |
sp.add_argument(*pargs, **kwargs) | |
| |
args = parser.parse_args() | |
try: | |
args.func(args) | |
except Exception as e: | |
parser.error(str(e)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment