talaikis/ptv.py

## ptv.py
"""
This idea first used for bitcoin. Data file format is MT4's.
Similar strategy(-ies) is used by market makers.
It's definitely betetr than any moving average based strategy.
ETH donations accepted: 0x007F11363140F2edE5f6d4F1a19A352861e013e0
"""

from os.path import join

from pandas import read_csv, to_datetime, DataFrame, read_pickle
import matplotlib.pyplot as plt
from sklearn.neighbors import KernelDensity
from scipy.integrate import quad
from numpy import exp, log, where, column_stack, arange, polyfit
from scipy import stats
import statsmodels.api as sm


symbol = "BTCUSD"
period = "240"
optimize = False
start = 50
step = 100
price_from = 100
price_to = 7000
percentile = 50
strategy_only = True
show_histo = False
show_sig = True
comparison = True
threshold = 5
leverage = 6


def read():
    df = read_csv(filepath_or_buffer="{0}{1}.csv".format(symbol, period), sep=',', delimiter=None, \
        header=None, names=['Date', 'Time', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOLUME'], \
        index_col=0)

    df.sort_index(axis=0, ascending=True, inplace=True)
    df.index = to_datetime(df.index).to_pydatetime()
    df.index.name = "DATE_TIME"

    return df


def histo(df):
    """
    Plots histogram with 0 vertical line (red) and average (blue).
    """
    df["ret"] = df.CLOSE.pct_change()
    df = df.dropna()

    plt.hist(df["ret"], bins=100)
    plt.axvline(df["ret"].mean(), lw=2, color='b')
    plt.axvline(0.0, lw=2, color='r')
    plt.show()


def ptv(df, symbol, start, end, price_from, price_to, step):
    preds = []

    for d in range(start, end):
        print("{0}/{1}".format(d, len(df.index)))
        kd = KernelDensity(kernel='gaussian', bandwidth=0.75).fit(df["CLOSE"].iloc[(d-start):d].values.reshape(-1, 1))

        lst = []
        for i in range(price_from, price_to, step):
            range_start = i
            range_end = i + step

            probability = quad(lambda x: exp(kd.score_samples(x)), range_start, range_end)[0]
            lst.append(probability * (range_start + range_end)/2)

        preds.append([sum(lst), df.ix[d].CLOSE])

    df2 = DataFrame(preds, index=df.iloc[start:end].index)
    df2.columns = ["PREDS", "CLOSE"]
    df2["RETURNS"] = df2["CLOSE"].diff()
    df2.to_pickle("preds_{0}_{1}_{2}.pckl".format(symbol, period, start))


def mlog(x):
    return log(x)


def strategy(end):
    df = read_pickle("preds_{0}_{1}_{2}.pckl".format(symbol, period, start))

    if show_histo:
        histo(df=df)

    df["diff"] = df["CLOSE"] - df["PREDS"]
    df["var"] = df["diff"] * df["diff"]
    df["logvar"] = df["var"].apply(mlog)

    if show_sig:
        df["logvar"].plot()
        plt.axhline(threshold)
        plt.show()

    df = df.dropna()

    df["sig"] = where(df["logvar"] <= threshold, 1, 0)
    df["trades"] = where((df["sig"] == 1) & (df["sig"].shift()) == 0, 1, 0)

    print("Trades {0}".format(sum(df["trades"])))
    df["returns"] = df["sig"].shift() * df["CLOSE"].diff() * leverage

    print("Instrument STD {0}".format(df["CLOSE"].diff().std()))
    print("Strategy STD {0}".format(df["returns"].std()))
    print("Instrument MEAN {0}".format(df["CLOSE"].diff().mean()))
    print("Strategy MEAN {0}".format(df["returns"].mean()))

    if comparison:
        plt.plot(df.PREDS, color='g', lw=3)
        plt.plot(df.iloc[start:end].CLOSE, color='g')
        plt.ylabel("Symbol: {0} | Period: {1}".format(symbol, start))
        plt.show()

    df["returns"].cumsum().plot()
    df["CLOSE"].diff().cumsum().plot()
    plt.show()


def main(strategy_only):
    df = read()
    end = len(df.index)

    if not strategy_only:
        if optimize:
            for k in range(100, 800):
                ptv(df=df, symbol=symbol, start=k, end=end, price_from=price_from, price_to=price_to, step=step)
        else:
            ptv(df=df, symbol=symbol, start=start, end=end, price_from=price_from, price_to=price_to, step=step)

        strategy(end=end)
    else:
        strategy(end=end)


main(strategy_only=strategy_only)
	"""
	This idea first used for bitcoin. Data file format is MT4's.
	Similar strategy(-ies) is used by market makers.
	It's definitely betetr than any moving average based strategy.
	ETH donations accepted: 0x007F11363140F2edE5f6d4F1a19A352861e013e0
	"""

	from os.path import join

	from pandas import read_csv, to_datetime, DataFrame, read_pickle
	import matplotlib.pyplot as plt
	from sklearn.neighbors import KernelDensity
	from scipy.integrate import quad
	from numpy import exp, log, where, column_stack, arange, polyfit
	from scipy import stats
	import statsmodels.api as sm


	symbol = "BTCUSD"
	period = "240"
	optimize = False
	start = 50
	step = 100
	price_from = 100
	price_to = 7000
	percentile = 50
	strategy_only = True
	show_histo = False
	show_sig = True
	comparison = True
	threshold = 5
	leverage = 6


	def read():
	df = read_csv(filepath_or_buffer="{0}{1}.csv".format(symbol, period), sep=',', delimiter=None, \
	header=None, names=['Date', 'Time', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOLUME'], \
	index_col=0)

	df.sort_index(axis=0, ascending=True, inplace=True)
	df.index = to_datetime(df.index).to_pydatetime()
	df.index.name = "DATE_TIME"

	return df


	def histo(df):
	"""
	Plots histogram with 0 vertical line (red) and average (blue).
	"""
	df["ret"] = df.CLOSE.pct_change()
	df = df.dropna()

	plt.hist(df["ret"], bins=100)
	plt.axvline(df["ret"].mean(), lw=2, color='b')
	plt.axvline(0.0, lw=2, color='r')
	plt.show()


	def ptv(df, symbol, start, end, price_from, price_to, step):
	preds = []

	for d in range(start, end):
	print("{0}/{1}".format(d, len(df.index)))
	kd = KernelDensity(kernel='gaussian', bandwidth=0.75).fit(df["CLOSE"].iloc[(d-start):d].values.reshape(-1, 1))

	lst = []
	for i in range(price_from, price_to, step):
	range_start = i
	range_end = i + step

	probability = quad(lambda x: exp(kd.score_samples(x)), range_start, range_end)[0]
	lst.append(probability * (range_start + range_end)/2)

	preds.append([sum(lst), df.ix[d].CLOSE])

	df2 = DataFrame(preds, index=df.iloc[start:end].index)
	df2.columns = ["PREDS", "CLOSE"]
	df2["RETURNS"] = df2["CLOSE"].diff()
	df2.to_pickle("preds_{0}_{1}_{2}.pckl".format(symbol, period, start))


	def mlog(x):
	return log(x)


	def strategy(end):
	df = read_pickle("preds_{0}_{1}_{2}.pckl".format(symbol, period, start))

	if show_histo:
	histo(df=df)

	df["diff"] = df["CLOSE"] - df["PREDS"]
	df["var"] = df["diff"] * df["diff"]
	df["logvar"] = df["var"].apply(mlog)

	if show_sig:
	df["logvar"].plot()
	plt.axhline(threshold)
	plt.show()

	df = df.dropna()

	df["sig"] = where(df["logvar"] <= threshold, 1, 0)
	df["trades"] = where((df["sig"] == 1) & (df["sig"].shift()) == 0, 1, 0)

	print("Trades {0}".format(sum(df["trades"])))
	df["returns"] = df["sig"].shift() * df["CLOSE"].diff() * leverage

	print("Instrument STD {0}".format(df["CLOSE"].diff().std()))
	print("Strategy STD {0}".format(df["returns"].std()))
	print("Instrument MEAN {0}".format(df["CLOSE"].diff().mean()))
	print("Strategy MEAN {0}".format(df["returns"].mean()))

	if comparison:
	plt.plot(df.PREDS, color='g', lw=3)
	plt.plot(df.iloc[start:end].CLOSE, color='g')
	plt.ylabel("Symbol: {0} \| Period: {1}".format(symbol, start))
	plt.show()

	df["returns"].cumsum().plot()
	df["CLOSE"].diff().cumsum().plot()
	plt.show()


	def main(strategy_only):
	df = read()
	end = len(df.index)

	if not strategy_only:
	if optimize:
	for k in range(100, 800):
	ptv(df=df, symbol=symbol, start=k, end=end, price_from=price_from, price_to=price_to, step=step)
	else:
	ptv(df=df, symbol=symbol, start=start, end=end, price_from=price_from, price_to=price_to, step=step)

	strategy(end=end)
	else:
	strategy(end=end)


	main(strategy_only=strategy_only)