Skip to content

Instantly share code, notes, and snippets.

@nistrup
Last active April 21, 2023 10:38
Show Gist options
  • Star 39 You must be signed in to star a gist
  • Fork 18 You must be signed in to fork a gist
  • Save nistrup/1e724d6e450fd1da09a0782e6bfcd41a to your computer and use it in GitHub Desktop.
Save nistrup/1e724d6e450fd1da09a0782e6bfcd41a to your computer and use it in GitHub Desktop.
# IMPORTS
import pandas as pd
import math
import os.path
import time
from bitmex import bitmex
from binance.client import Client
from datetime import timedelta, datetime
from dateutil import parser
from tqdm import tqdm_notebook #(Optional, used for progress-bars)
### API
bitmex_api_key = '[REDACTED]' #Enter your own API-key here
bitmex_api_secret = '[REDACTED]' #Enter your own API-secret here
binance_api_key = '[REDACTED]' #Enter your own API-key here
binance_api_secret = '[REDACTED]' #Enter your own API-secret here
### CONSTANTS
binsizes = {"1m": 1, "5m": 5, "1h": 60, "1d": 1440}
batch_size = 750
bitmex_client = bitmex(test=False, api_key=bitmex_api_key, api_secret=bitmex_api_secret)
binance_client = Client(api_key=binance_api_key, api_secret=binance_api_secret)
### FUNCTIONS
def minutes_of_new_data(symbol, kline_size, data, source):
if len(data) > 0: old = parser.parse(data["timestamp"].iloc[-1])
elif source == "binance": old = datetime.strptime('1 Jan 2017', '%d %b %Y')
elif source == "bitmex": old = bitmex_client.Trade.Trade_getBucketed(symbol=symbol, binSize=kline_size, count=1, reverse=False).result()[0][0]['timestamp']
if source == "binance": new = pd.to_datetime(binance_client.get_klines(symbol=symbol, interval=kline_size)[-1][0], unit='ms')
if source == "bitmex": new = bitmex_client.Trade.Trade_getBucketed(symbol=symbol, binSize=kline_size, count=1, reverse=True).result()[0][0]['timestamp']
return old, new
def get_all_binance(symbol, kline_size, save = False):
filename = '%s-%s-data.csv' % (symbol, kline_size)
if os.path.isfile(filename): data_df = pd.read_csv(filename)
else: data_df = pd.DataFrame()
oldest_point, newest_point = minutes_of_new_data(symbol, kline_size, data_df, source = "binance")
delta_min = (newest_point - oldest_point).total_seconds()/60
available_data = math.ceil(delta_min/binsizes[kline_size])
if oldest_point == datetime.strptime('1 Jan 2017', '%d %b %Y'): print('Downloading all available %s data for %s. Be patient..!' % (kline_size, symbol))
else: print('Downloading %d minutes of new data available for %s, i.e. %d instances of %s data.' % (delta_min, symbol, available_data, kline_size))
klines = binance_client.get_historical_klines(symbol, kline_size, oldest_point.strftime("%d %b %Y %H:%M:%S"), newest_point.strftime("%d %b %Y %H:%M:%S"))
data = pd.DataFrame(klines, columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_av', 'trades', 'tb_base_av', 'tb_quote_av', 'ignore' ])
data['timestamp'] = pd.to_datetime(data['timestamp'], unit='ms')
if len(data_df) > 0:
temp_df = pd.DataFrame(data)
data_df = data_df.append(temp_df)
else: data_df = data
data_df.set_index('timestamp', inplace=True)
if save: data_df.to_csv(filename)
print('All caught up..!')
return data_df
def get_all_bitmex(symbol, kline_size, save = False):
filename = '%s-%s-data.csv' % (symbol, kline_size)
if os.path.isfile(filename): data_df = pd.read_csv(filename)
else: data_df = pd.DataFrame()
oldest_point, newest_point = minutes_of_new_data(symbol, kline_size, data_df, source = "bitmex")
delta_min = (newest_point - oldest_point).total_seconds()/60
available_data = math.ceil(delta_min/binsizes[kline_size])
rounds = math.ceil(available_data / batch_size)
if rounds > 0:
print('Downloading %d minutes of new data available for %s, i.e. %d instances of %s data in %d rounds.' % (delta_min, symbol, available_data, kline_size, rounds))
for round_num in tqdm_notebook(range(rounds)):
time.sleep(1)
new_time = (oldest_point + timedelta(minutes = round_num * batch_size * binsizes[kline_size]))
data = bitmex_client.Trade.Trade_getBucketed(symbol=symbol, binSize=kline_size, count=batch_size, startTime = new_time).result()[0]
temp_df = pd.DataFrame(data)
data_df = data_df.append(temp_df)
data_df.set_index('timestamp', inplace=True)
if save and rounds > 0: data_df.to_csv(filename)
print('All caught up..!')
return data_df
@nkipa
Copy link

nkipa commented Aug 17, 2022

Getting this annoying message

C:\Users\Paul\anaconda3\lib\site-packages\dateparser\date_parser.py:35: PytzUsageWarning: The localize method is no longer necessary, as this time zone supports the fold attribute (PEP 495). For more details on migrating to a PEP 495-compliant implementation, see https://pytz-deprecation-shim.readthedocs.io/en/latest/migration.html
date_obj = stz.localize(date_obj)

@MiguelAngelMikeMeza
Copy link

Thanks Bro, its amazing

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment