n-lavrenko/edit_bars_and_ticks.py

## edit_bars_and_ticks.py
#  I use this script for make historical data of Ducascopy identical with FxClub forex broker.
#  Then I can make deep historical optimization in MetaTrader 5 and use results in real trading on FxClub.
#  You can edit some variables in code and code itself to edit Ducascopy historical data to make data as you need.
#  Good luck.

import time
import pandas as pd


symbol = 'AUDUSD'
path_to_bars_file = f'D:\TickstoryDownloads\\{symbol}\\{symbol}_mt5_bars.csv'
path_to_bars_edited_file = f'D:\TickstoryDownloads\\{symbol}\\{symbol}_bars_ducas_clean_UTC+3.csv'
path_to_tick_file = f'D:\TickstoryDownloads\\{symbol}\\{symbol}_mt5_ticks.csv'
path_to_ticks_edited_file = f'D:\TickstoryDownloads\\{symbol}\\{symbol}_ticks_ducas_clean_UTC+3.csv'

bars_cols_names = ['date', 'time', 'open', 'high', 'low', 'close', 'tick_vol', 'vol', 'spread']
bars_dtype = {'date': object, 'open': str, 'high': str, 'low': str, 'close': str, 'tick_vol': str, 'vol': str}
ticks_cols_names = ['date', 'time', 'bid', 'ask']
ticks_dtype = {'date': object, 'bid': str, 'ask': str}
years_count = 14
bars_chunk_size = 100000
ticks_chunk_size = 1000000
increase_hours_timezone = 3
is_print_technical_info = False


def get_reader(path, cols, d_type, chunk_size):
    return pd.read_csv(path, dtype=d_type, iterator=True, sep=',', header=None, names=cols,
                       usecols=range(0, len(cols)), chunksize=chunk_size)


def get_winter_weeks():
    all_weeks_should_changed = []
    reader = get_reader(path_to_bars_file, bars_cols_names, bars_dtype, bars_chunk_size)
    for chunk in reader:
        chunk['date_time'] = pd.to_datetime(chunk['date'] + chunk['time'], format='%Y%m%d%H:%M:%S')
        chunk['date_time'] = chunk['date_time'] + pd.Timedelta(hours=increase_hours_timezone)
        chunk['dayofweek'] = chunk['date_time'].dt.dayofweek
        chunk['is_day_of'] = chunk['dayofweek'] > 4
        chunk['week'] = chunk['date_time'].dt.week
        chunk['year_week'] = chunk['date_time'].dt.year.astype('str') + '/' + chunk['week'].astype('str')

        mask = chunk['is_day_of']
        unique = chunk.loc[mask]['year_week'].unique()
        all_weeks_should_changed.extend(unique)
    result = list(dict.fromkeys(all_weeks_should_changed))
    if is_print_technical_info:
        print(result)
    return result


def date_exclude_creater():
    start_year = 2007
    new_year_date_start = '01-01 00:00:00'
    new_year_date_end = '01-01 23:59:59'
    cristmas_date_start = '12-24 22:00:00'
    cristmas_date_end = '12-26 02:59:59'
    _all_new_years_start = []
    _all_new_years_end = []
    _all_cristmases_start = []
    _all_cristmases_end = []
    for d in range(years_count):
        _all_new_years_start.append(str(start_year + d) + '-' + new_year_date_start)
        _all_new_years_end.append(str(start_year + d) + '-' + new_year_date_end)
        _all_cristmases_start.append(str(start_year + d) + '-' + cristmas_date_start)
        _all_cristmases_end.append(str(start_year + d) + '-' + cristmas_date_end)
    if is_print_technical_info:
        print(_all_new_years_start, _all_new_years_end, _all_cristmases_start, _all_cristmases_end)
    return _all_new_years_start, _all_new_years_end, _all_cristmases_start, _all_cristmases_end


def filter_dates(chunk):
    for index in range(years_count):
        y_s = pd.to_datetime(all_new_years_start[index])
        y_e = pd.to_datetime(all_new_years_end[index])
        x_s = pd.to_datetime(all_cristmases_start[index])
        x_e = pd.to_datetime(all_cristmases_end[index])
        mask_year = (chunk['date_time'] >= y_s) & (chunk['date_time'] <= y_e)
        mask_xmas = (chunk['date_time'] >= x_s) & (chunk['date_time'] <= x_e)

        chunk.drop(chunk.loc[mask_xmas].index, inplace=True)
        chunk.drop(chunk.loc[mask_year].index, inplace=True)
    return chunk


def read_edit_write(is_bars):
    if is_bars:
        reader = get_reader(path_to_bars_file, bars_cols_names, bars_dtype, bars_chunk_size)
        cols_names = bars_cols_names
        path_to_write = path_to_bars_edited_file
    else:
        reader = get_reader(path_to_tick_file, ticks_cols_names, ticks_dtype, ticks_chunk_size)
        cols_names = ticks_cols_names
        path_to_write = path_to_ticks_edited_file

    with open(path_to_write, 'w') as path:
        for chunk in reader:
            chunk['date_time'] = pd.to_datetime(chunk['date'] + chunk['time'], format='%Y%m%d%H:%M:%S')
            chunk['date_time'] = chunk['date_time'] + pd.Timedelta(hours=increase_hours_timezone)
            chunk['dayofweek'] = chunk['date_time'].dt.dayofweek
            chunk['week'] = chunk['date_time'].dt.week
            chunk['year_week'] = chunk['date_time'].dt.year.astype('str') + '/' + chunk['week'].astype('str')

            chunk['updated_date_time'] = chunk[chunk['year_week'].isin(unique_weeks)]['date_time'] - pd.Timedelta(hours=1)
            chunk['updated_date_time'].fillna(chunk['date_time'], inplace=True)
            # Filter saturdays:
            chunk = chunk[chunk['dayofweek'] < 5]

            # filter data by day !01.01, limit 25.12 till 20:00
            chunk = filter_dates(chunk)

            chunk['date'] = chunk['updated_date_time'].dt.strftime('%Y%m%d')
            chunk['time'] = chunk['updated_date_time'].dt.strftime('%H:%M:%S')

            chunk[cols_names].to_csv(path, sep=',', header=False, line_terminator='\n', index=None)


def edit_history():
    start_time = time.perf_counter()
    read_edit_write(True)
    print(f'Bars of {symbol} is done! Spent: {(time.perf_counter() - start_time) / 60 :0.2f} minutes')

    start_time = time.perf_counter()
    read_edit_write(False)
    print(f'Ticks of {symbol} is done! Spent: {(time.perf_counter() - start_time) / 60 / 60 :0.2f} hours')


print(f'Editing bars and ticks of {symbol} is started.')
(all_new_years_start, all_new_years_end, all_cristmases_start, all_cristmases_end) = date_exclude_creater()
unique_weeks = get_winter_weeks()
edit_history()
	# I use this script for make historical data of Ducascopy identical with FxClub forex broker.
	# Then I can make deep historical optimization in MetaTrader 5 and use results in real trading on FxClub.
	# You can edit some variables in code and code itself to edit Ducascopy historical data to make data as you need.
	# Good luck.

	import time
	import pandas as pd


	symbol = 'AUDUSD'
	path_to_bars_file = f'D:\TickstoryDownloads\\{symbol}\\{symbol}_mt5_bars.csv'
	path_to_bars_edited_file = f'D:\TickstoryDownloads\\{symbol}\\{symbol}_bars_ducas_clean_UTC+3.csv'
	path_to_tick_file = f'D:\TickstoryDownloads\\{symbol}\\{symbol}_mt5_ticks.csv'
	path_to_ticks_edited_file = f'D:\TickstoryDownloads\\{symbol}\\{symbol}_ticks_ducas_clean_UTC+3.csv'

	bars_cols_names = ['date', 'time', 'open', 'high', 'low', 'close', 'tick_vol', 'vol', 'spread']
	bars_dtype = {'date': object, 'open': str, 'high': str, 'low': str, 'close': str, 'tick_vol': str, 'vol': str}
	ticks_cols_names = ['date', 'time', 'bid', 'ask']
	ticks_dtype = {'date': object, 'bid': str, 'ask': str}
	years_count = 14
	bars_chunk_size = 100000
	ticks_chunk_size = 1000000
	increase_hours_timezone = 3
	is_print_technical_info = False


	def get_reader(path, cols, d_type, chunk_size):
	return pd.read_csv(path, dtype=d_type, iterator=True, sep=',', header=None, names=cols,
	usecols=range(0, len(cols)), chunksize=chunk_size)


	def get_winter_weeks():
	all_weeks_should_changed = []
	reader = get_reader(path_to_bars_file, bars_cols_names, bars_dtype, bars_chunk_size)
	for chunk in reader:
	chunk['date_time'] = pd.to_datetime(chunk['date'] + chunk['time'], format='%Y%m%d%H:%M:%S')
	chunk['date_time'] = chunk['date_time'] + pd.Timedelta(hours=increase_hours_timezone)
	chunk['dayofweek'] = chunk['date_time'].dt.dayofweek
	chunk['is_day_of'] = chunk['dayofweek'] > 4
	chunk['week'] = chunk['date_time'].dt.week
	chunk['year_week'] = chunk['date_time'].dt.year.astype('str') + '/' + chunk['week'].astype('str')

	mask = chunk['is_day_of']
	unique = chunk.loc[mask]['year_week'].unique()
	all_weeks_should_changed.extend(unique)
	result = list(dict.fromkeys(all_weeks_should_changed))
	if is_print_technical_info:
	print(result)
	return result


	def date_exclude_creater():
	start_year = 2007
	new_year_date_start = '01-01 00:00:00'
	new_year_date_end = '01-01 23:59:59'
	cristmas_date_start = '12-24 22:00:00'
	cristmas_date_end = '12-26 02:59:59'
	_all_new_years_start = []
	_all_new_years_end = []
	_all_cristmases_start = []
	_all_cristmases_end = []
	for d in range(years_count):
	_all_new_years_start.append(str(start_year + d) + '-' + new_year_date_start)
	_all_new_years_end.append(str(start_year + d) + '-' + new_year_date_end)
	_all_cristmases_start.append(str(start_year + d) + '-' + cristmas_date_start)
	_all_cristmases_end.append(str(start_year + d) + '-' + cristmas_date_end)
	if is_print_technical_info:
	print(_all_new_years_start, _all_new_years_end, _all_cristmases_start, _all_cristmases_end)
	return _all_new_years_start, _all_new_years_end, _all_cristmases_start, _all_cristmases_end


	def filter_dates(chunk):
	for index in range(years_count):
	y_s = pd.to_datetime(all_new_years_start[index])
	y_e = pd.to_datetime(all_new_years_end[index])
	x_s = pd.to_datetime(all_cristmases_start[index])
	x_e = pd.to_datetime(all_cristmases_end[index])
	mask_year = (chunk['date_time'] >= y_s) & (chunk['date_time'] <= y_e)
	mask_xmas = (chunk['date_time'] >= x_s) & (chunk['date_time'] <= x_e)

	chunk.drop(chunk.loc[mask_xmas].index, inplace=True)
	chunk.drop(chunk.loc[mask_year].index, inplace=True)
	return chunk


	def read_edit_write(is_bars):
	if is_bars:
	reader = get_reader(path_to_bars_file, bars_cols_names, bars_dtype, bars_chunk_size)
	cols_names = bars_cols_names
	path_to_write = path_to_bars_edited_file
	else:
	reader = get_reader(path_to_tick_file, ticks_cols_names, ticks_dtype, ticks_chunk_size)
	cols_names = ticks_cols_names
	path_to_write = path_to_ticks_edited_file

	with open(path_to_write, 'w') as path:
	for chunk in reader:
	chunk['date_time'] = pd.to_datetime(chunk['date'] + chunk['time'], format='%Y%m%d%H:%M:%S')
	chunk['date_time'] = chunk['date_time'] + pd.Timedelta(hours=increase_hours_timezone)
	chunk['dayofweek'] = chunk['date_time'].dt.dayofweek
	chunk['week'] = chunk['date_time'].dt.week
	chunk['year_week'] = chunk['date_time'].dt.year.astype('str') + '/' + chunk['week'].astype('str')

	chunk['updated_date_time'] = chunk[chunk['year_week'].isin(unique_weeks)]['date_time'] - pd.Timedelta(hours=1)
	chunk['updated_date_time'].fillna(chunk['date_time'], inplace=True)
	# Filter saturdays:
	chunk = chunk[chunk['dayofweek'] < 5]

	# filter data by day !01.01, limit 25.12 till 20:00
	chunk = filter_dates(chunk)

	chunk['date'] = chunk['updated_date_time'].dt.strftime('%Y%m%d')
	chunk['time'] = chunk['updated_date_time'].dt.strftime('%H:%M:%S')

	chunk[cols_names].to_csv(path, sep=',', header=False, line_terminator='\n', index=None)


	def edit_history():
	start_time = time.perf_counter()
	read_edit_write(True)
	print(f'Bars of {symbol} is done! Spent: {(time.perf_counter() - start_time) / 60 :0.2f} minutes')

	start_time = time.perf_counter()
	read_edit_write(False)
	print(f'Ticks of {symbol} is done! Spent: {(time.perf_counter() - start_time) / 60 / 60 :0.2f} hours')


	print(f'Editing bars and ticks of {symbol} is started.')
	(all_new_years_start, all_new_years_end, all_cristmases_start, all_cristmases_end) = date_exclude_creater()
	unique_weeks = get_winter_weeks()
	edit_history()