Ekrekr/basic_svm_algo.py

## basic_svm_algo.py
# -*- coding: utf-8 -*-
"""Confidence Based Support Vector Machine

This algorithm implements a confidence based support vector machine, as designed
by Debarghya Das (website http://debarghyadas.com/).

To clear up any ambiguity with regards to financial terms, my use of the word
"Margin" throughout this coderelates to the length of the sequence of data being
experimented upon and "Polarity" indicates the direction of within each sequence
item. +1 indicates up and -1 indicates down.

The algorithmic complexity here ends up being O(n^2) squared, where n is the
number of longest period of time.

License:
    (c) 2019 - Elias Kassell Raymond
    This code is licenced under the Gnu General Public License v3, as stipulated
    by https://www.gnu.org/licenses/gpl-3.0.en.html

Prerequisites:
    Python3:        https://www.python.org/download/releases/3.0/
    zipline:        https://www.zipline.io/
    Quandl API Key: https://www.quandl.com/

Example:
    Ingesting with Quandl is only neccessary once, but is useful needs to be
    done every time you want to experiment on fresh data::

        $ QUANDL_API_KEY=[your API key] zipline ingest -b quandl
        $ zipline run -f algo.py --start 2017-1-1 --end 2018-1-1 -o out.pickle

TODO:
    * Reduce algorithmic complexity.

.. _Google Python Style Guide:
   http://google.github.io/styleguide/pyguide.html

"""

import datetime
from zipline.api import order_target, record, symbol
from sklearn import svm

#: list of lists: Needed for keeping track of and training support vector
# machines.
SEQS = []

#: list: Keeps track of the final values in each sequence. Position in array
# correlates as with SEQS.
POLS = []

#: int: Keeps track of progress. Important as
COUNT = 0

def initialize(context):
    """Sets the intial environment for zipline processing

    Args:
        context (zipline.algorithm.TradingAlgorithm):
            Zipline persistent namespace.

    Returns:
        Does not return
    """
    context.i = 0
    context.asset = symbol('AAPL')

def handle_data(context, data):
    """Processes financial data at particular time step

    Args:
        context (zipline.algorithm.TradingAlgorithm):
            Zipline persistent namespace.
        data    (zipline._protocol.BarData):
            Methods to access spot value, history windows and other utility
            methods.

    Returns:
        Does not return
    """

    marginSize = 30

    # Skip the first 31 days as the sequence would not be long enough to add to
    # the groups for SVM classification.
    context.i += 1
    if context.i < marginSize + 1:
        return

    #: pandas.core.series.Series: All data info from the past 30 days.
    marginData = data.history(context.asset, 'price', bar_count=marginSize + 1, frequency="1d")

    #: list: 1 if increase in price from the previous day, -1 if decrease.
    marginPolarity = [1 if marginData[i] > marginData[i + 1] else -1
                      for i, _ in enumerate(marginData[:-1])]

    SEQS.append(marginPolarity)
    POLS.append(marginPolarity[-1])

    # Wait another month so there's just about enough data to make a proper
    # classification.
    context.i += 1
    if context.i < marginSize * 2 + 1:
        return

    # Create a linear support vector classification of the sequences.
    linCLF = svm.SVC()
    linCLF.fit(SEQS, POLS)

    # Use the model to predict the sequence.
    polarityPrediction = linCLF.predict([marginPolarity])[0]

    # Buy maximum possible amount of asset if possible, otherwise "sell all".
    # (Obviously there are flaws in simulating like this, but from an abstract
    # view it is fine).
    toOrder = 0

    if polarityPrediction == 1:
        toOrder = context.portfolio.cash / marginData[0]

    order_target(context.asset, toOrder)

    # Save values for later inspection
    record(price=data.current(context.asset, 'price'),
           polarityPrediction=polarityPrediction)

    if context.i % 100 == 0:
        print("Progress update:", context.i, datetime.datetime.now())
	# -- coding: utf-8 --
	"""Confidence Based Support Vector Machine

	This algorithm implements a confidence based support vector machine, as designed
	by Debarghya Das (website http://debarghyadas.com/).

	To clear up any ambiguity with regards to financial terms, my use of the word
	"Margin" throughout this coderelates to the length of the sequence of data being
	experimented upon and "Polarity" indicates the direction of within each sequence
	item. +1 indicates up and -1 indicates down.

	The algorithmic complexity here ends up being O(n^2) squared, where n is the
	number of longest period of time.

	License:
	(c) 2019 - Elias Kassell Raymond
	This code is licenced under the Gnu General Public License v3, as stipulated
	by https://www.gnu.org/licenses/gpl-3.0.en.html

	Prerequisites:
	Python3: https://www.python.org/download/releases/3.0/
	zipline: https://www.zipline.io/
	Quandl API Key: https://www.quandl.com/

	Example:
	Ingesting with Quandl is only neccessary once, but is useful needs to be
	done every time you want to experiment on fresh data::

	$ QUANDL_API_KEY=[your API key] zipline ingest -b quandl
	$ zipline run -f algo.py --start 2017-1-1 --end 2018-1-1 -o out.pickle

	TODO:
	* Reduce algorithmic complexity.

	.. _Google Python Style Guide:
	http://google.github.io/styleguide/pyguide.html

	"""

	import datetime
	from zipline.api import order_target, record, symbol
	from sklearn import svm

	#: list of lists: Needed for keeping track of and training support vector
	# machines.
	SEQS = []

	#: list: Keeps track of the final values in each sequence. Position in array
	# correlates as with SEQS.
	POLS = []

	#: int: Keeps track of progress. Important as
	COUNT = 0

	def initialize(context):
	"""Sets the intial environment for zipline processing

	Args:
	context (zipline.algorithm.TradingAlgorithm):
	Zipline persistent namespace.

	Returns:
	Does not return
	"""
	context.i = 0
	context.asset = symbol('AAPL')

	def handle_data(context, data):
	"""Processes financial data at particular time step

	Args:
	context (zipline.algorithm.TradingAlgorithm):
	Zipline persistent namespace.
	data (zipline._protocol.BarData):
	Methods to access spot value, history windows and other utility
	methods.

	Returns:
	Does not return
	"""

	marginSize = 30

	# Skip the first 31 days as the sequence would not be long enough to add to
	# the groups for SVM classification.
	context.i += 1
	if context.i < marginSize + 1:
	return

	#: pandas.core.series.Series: All data info from the past 30 days.
	marginData = data.history(context.asset, 'price', bar_count=marginSize + 1, frequency="1d")

	#: list: 1 if increase in price from the previous day, -1 if decrease.
	marginPolarity = [1 if marginData[i] > marginData[i + 1] else -1
	for i, _ in enumerate(marginData[:-1])]

	SEQS.append(marginPolarity)
	POLS.append(marginPolarity[-1])

	# Wait another month so there's just about enough data to make a proper
	# classification.
	context.i += 1
	if context.i < marginSize * 2 + 1:
	return

	# Create a linear support vector classification of the sequences.
	linCLF = svm.SVC()
	linCLF.fit(SEQS, POLS)

	# Use the model to predict the sequence.
	polarityPrediction = linCLF.predict([marginPolarity])[0]

	# Buy maximum possible amount of asset if possible, otherwise "sell all".
	# (Obviously there are flaws in simulating like this, but from an abstract
	# view it is fine).
	toOrder = 0

	if polarityPrediction == 1:
	toOrder = context.portfolio.cash / marginData[0]

	order_target(context.asset, toOrder)

	# Save values for later inspection
	record(price=data.current(context.asset, 'price'),
	polarityPrediction=polarityPrediction)

	if context.i % 100 == 0:
	print("Progress update:", context.i, datetime.datetime.now())