Skip to content

Instantly share code, notes, and snippets.

@iamanvesh
Last active October 7, 2020 07:57
Show Gist options
  • Save iamanvesh/597fd5f07752e5c944581afd954d306f to your computer and use it in GitHub Desktop.
Save iamanvesh/597fd5f07752e5c944581afd954d306f to your computer and use it in GitHub Desktop.
Quick and dirty way to build a continuous contract from monthly contracts that can be used to back-test. Rollover to the new contract at the given expiry hour/minute in your backtest.
import os
import csv
import pandas as pd
from datetime import datetime
months = {
'JAN': 1,
'FEB': 2,
'MAR': 3,
'APR': 4,
'MAY': 5,
'JUN': 6,
'JUL': 7,
'AUG': 8,
'SEP': 9,
'OCT': 10,
'NOV': 11,
'DEC': 12
}
years = [
2011,
2012,
2013,
2014,
2015,
2016,
2017,
2018,
2019,
2020,
]
known_patterns = [
'%d/%m/%Y %H:%M:%S',
'%Y%m%d %H:%M',
'%Y%m%d %H:%M:%S',
]
def validate_pattern(date, pattern):
'''
Check if the given date is in the required pattern.
Args:
date: str
pattern: str
Return:
True/False
'''
try:
datetime.strptime(date, pattern)
return True
except ValueError:
return False
def parse_date(date):
'''
Parse the date string with one of the known formats
Args:
date: str
Returns:
parsed_date: datetime
'''
for pattern in known_patterns:
if validate_pattern(date, pattern):
parsed_date = datetime.strptime(date, pattern)
if parsed_date.second == 59:
# Some contracts are labeled with right edge (mm:59)
parsed_date = parsed_date.replace(minute=parsed_date.minute, second=0)
return parsed_date
raise RuntimeError(f'Unable to parse {date}')
def read_file(file_name):
'''
Returns a list of rows in the csv file.
Args:
file_name Name of the file.
Return:
A Pandas DataFrame with the contents
'''
data = pd.read_csv(
file_name,
names=['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'OI'],
sep=','
)
data['Time'] = data['Time'].apply(lambda x: f'0{x}' if x[0] == '9' else x)
data['Date'] = data[['Date', 'Time']].apply(lambda x: ' '.join([str(_x) for _x in x]), axis=1)
data['Date'] = data['Date'].apply(lambda x: parse_date(x))
data = data.drop('Time', axis=1)
return data
def get_file_name(month, year, symbol):
'''
Returns the path of the contract's file based on the name and the year.
Assumes that the contracts dir tree is of the following format
./symbol
2011/
{symbol}JAN11.csv
Args:
month: str
year: int
Returns:
path: str
'''
return f'./{symbol}/{year}/{symbol}{month}{year%2000}.csv'
def build_cont_data(expiry_hour, expiry_minute, cont_file_name, symbol):
'''
Iterates over all the contract files and builds a continuous futures contract
with left-labeled timestamps.
Args:
expiry_hour: int -- the hour at which the current contract should be rolled over.
expriy_minute: int -- the minute at which the current contract should be rolled over.
cont_file_name: str -- path of the file to store the continuous data.
symbol: str -- tradingsymbol of the instrument.
Returns:
None
'''
prev_expiry = None
curr_expiry = None
cont_data = pd.DataFrame(columns=['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'OI'])
for year in years:
for month in months:
print(month, year)
file_name = get_file_name(month, year, symbol)
if not os.path.exists(file_name):
break
curr_contract = read_file(file_name)
curr_expiry = curr_contract.iloc[-1]['Date']
curr_expiry = curr_expiry.replace(hour=expiry_hour, minute=expiry_minute)
if prev_expiry == None:
cont_data = cont_data.append(curr_contract[curr_contract['Date'] <= curr_expiry])
else:
cont_data = cont_data.append(
curr_contract[
(curr_contract['Date'] <= curr_expiry) &
(curr_contract['Date'] > prev_expiry)
]
)
prev_expiry = curr_expiry
cont_data.set_index('Date').to_csv(cont_file_name, float_format='%.2f')
if __name__ == '__main__':
build_cont_data(15, 14, './nifty_continuous.csv', 'NIFTY')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment