Skip to content

Instantly share code, notes, and snippets.

@racinger
racinger / migration_pod_to_serverless_pinecone.ipynb
Last active April 12, 2024 05:44
A simple script to migrate data from your pod architecture to a serverless architecture in pinecone
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# Fill NaN missing value by 'ffill' method
data.fillna(method='ffill', inplace=True)
# Replace value of 0 in volume column by a very small number
data['volume'].replace([0, 0.0], float(0.0000000001), inplace=True)
# Read data from csv file to a dataframe
symbol = 'BTC/USDT'
data = pd.read_csv('data_since6months_freq1h'+symbol.split('/')[0]+'.csv',
index_col="date")
# Extra precaution to ensure correct data: remove potential duplicate
data.index = pd.DatetimeIndex(data.index)
data = data[~data.index.duplicated(keep='first')]
# Reindex date approriately to easily spot missing data with NaN value
data = data.reindex(pd.date_range(start=data.index[0], end=data.index[-1], freq='1h'))
# install pandas with pip install pandas, perfect library for manipulate our dataset
import pandas as pd
symbol = 'BTC/USDT'
print(symbol)
ohlcv_dataframe = pd.DataFrame()
for hours in range(4320,0,-600): # 6 month is around 24hours * 30days * 6 = 4320
if binance.has['fetchOHLCV']:
time.sleep (binance.rateLimit / 1000) # time.sleep wants seconds
# the limit from binance is 1000 timesteps
ohlcv = binance.fetch_ohlcv(symbol, '1h', since=current_milli_time(hours),
# Simple function to create the timestamp of x number of hours in the past.
import time
current_milli_time = lambda x: int(round((time.time()- 3600*x) * 1000))
import ccxt
# Create instance for your exchange, here binance
binance = ccxt.binance()
# Quick test to verify data access
# Get the last 2 hours candelsticks from the pair 'BTC/USDT'
pair = 'BTC/USDT'
binance.fetch_ohlcv(pair, limit=2)
# install ccxt library within jupyter notebook
pip install ccxt #!pip install ccxt in jupyter