Skip to content

Instantly share code, notes, and snippets.

@maks-ivanov
Created March 17, 2019 22:11
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save maks-ivanov/e668c47addfa69e86da5a44e3f634dd5 to your computer and use it in GitHub Desktop.
Save maks-ivanov/e668c47addfa69e86da5a44e3f634dd5 to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
from datetime import datetime
from fastparquet import write
def compute_vwap(df):
q = df['foreignNotional']
p = df['price']
vwap = np.sum(p * q) / np.sum(q)
df['vwap'] = vwap
return df
def ohlc(df):
df['open'] = df.price.iloc[0]
df['high'] = df.price.max()
df['low'] = df.price.min()
df['close'] = df.price.iloc[-1]
return df[-1:]
data = pd.read_csv('data/20181204.csv')
data = data[data.symbol == 'XBTUSD']
paths = ['data/20181205.csv','data/20181206.csv','data/20181207.csv', 'data/20181208.csv', 'data/20181209.csv']
for path in paths:
df = pd.read_csv(path)
df = df[df.symbol == 'XBTUSD']
data = data.append(df)
data['timestamp'] = data.timestamp.map(lambda t: datetime.strptime(t[:-3], "%Y-%m-%dD%H:%M:%S.%f")) # timestamp parsing
data.set_index('timestamp', inplace=True)
data.sort_index(inplace=True)
data_cm_dollar = data.assign(cmDollar=data['foreignNotional'].cumsum())
total_dollars = data_cm_dollar.cmDollar.values[-1]
dollars_per_bar = 2e6
print('Total dollars:', total_dollars)
print('Dollars per bar:', dollars_per_bar)
data_dollar_grp = data_cm_dollar.assign(grpId=lambda row: row.cmDollar // dollars_per_bar)
print('Number of dollar bars:', len(data_dollar_grp.groupby(['grpId'])))
data_dollar_ohlc = data_dollar_grp.groupby('grpId').apply(lambda x: ohlc(compute_vwap(x)))
data_dollar_ohlc.index = data_dollar_ohlc.index.droplevel()
data_dollar_ohlc = data_dollar_ohlc[~data_dollar_ohlc.index.duplicated(keep='first')]
# save to file
write('data_dollar_ohlc.pq', data_dollar_ohlc)
@llj098
Copy link

llj098 commented Aug 5, 2019

line 30:

data['timestamp'] = data.timestamp.map(lambda t: datetime.strptime(t[:-3], "%Y-%m-%dD%H:%M:%S.%f")) # timestamp parsing

seems useless?

@maks-ivanov
Copy link
Author

maks-ivanov commented Aug 22, 2019

line 30:

data['timestamp'] = data.timestamp.map(lambda t: datetime.strptime(t[:-3], "%Y-%m-%dD%H:%M:%S.%f")) # timestamp parsing

seems useless?

How so? Pandas reads the column as a string, and you want to manipulate it as a datetime object later. This line does the conversion

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment