Skip to content

Instantly share code, notes, and snippets.

@jinhwanlazy
Last active April 12, 2018 00:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jinhwanlazy/03db4ffce613ea59b82867b06095ad25 to your computer and use it in GitHub Desktop.
Save jinhwanlazy/03db4ffce613ea59b82867b06095ad25 to your computer and use it in GitHub Desktop.
collect trade history from poloniex exchange
import httplib2
import pandas as pd
import simplejson as json
from datetime import datetime
from datetime import timedelta
def get_trade_history_(currency_pair='USDT_BTC', start=1494000000, end=1600000000):
"""
Simply query trade history in given range
"""
url = 'https://poloniex.com/public?command=returnTradeHistory&currencyPair={}&start={}&end={}'.format(currency_pair, start, end)
http = httplib2.Http()
response, content = http.request(url, 'GET')
res = pd.DataFrame(json.loads(content))
return res
def get_trade_history(currency_pair='USDT_BTC'):
"""
Sequencialy collect whole trade history.
It also cache the data in file.
TODO. parallel access?
"""
def timestamp(date):
dt = datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
tz = timedelta(hours=9)
return (dt + tz).timestamp()
def nxt_range(data):
interval = timedelta(days=24).total_seconds()
now = datetime.now().timestamp()
if data.empty:
return now-interval, now
max_t = timestamp(data.date.max())
min_t = timestamp(data.date.min())
if now - max_t > timedelta(hours=1).total_seconds():
return max_t-10, now
if 1 not in data.tradeID.values:
return min_t-interval, min_t+10
return None, None
def crawl(data, start, end):
new = get_trade_history_(currency_pair, start, end)
data = pd.concat([data, new]).drop_duplicates()
data = data.sort_values(by='tradeID').reset_index(drop=True)
return data
cache = "poloniex_{}.pkl.gz".format(currency_pair)
try:
data = pd.read_pickle(cache)
except:
data = pd.DataFrame()
print(data)
start, end = nxt_range(data)
while start is not None:
print('seek', start, end)
data = crawl(data, start, end)
print('now', data.tradeID.min(), data.tradeID.max())
start, end = nxt_range(data)
fill_gaps(data)
data.to_pickle(cache)
return data
def fill_gaps(df):
"""
Finds any non continuous data, and fill the gaps
Not implemented yet.
"""
ts = list(df.loc[df.tradeID - df.tradeID.shift() != 1].date)[1:]
print(ts)
if not ts:
return
if __name__ == "__main__":
df = get_trade_history("USDT_BTC")
print(df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment