Created
October 25, 2018 10:14
-
-
Save NMZivkovic/a04d813557ae31d0890036162fb68eb8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Usage: analyse_data.py --company=<company> | |
""" | |
import warnings | |
import logging | |
import itertools | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from hmmlearn.hmm import GaussianHMM | |
from sklearn.model_selection import train_test_split | |
from tqdm import tqdm | |
from docopt import docopt | |
args = docopt(doc=__doc__, argv=None, help=True, | |
version=None, options_first=False) | |
# Supress warning in hmmlearn | |
warnings.filterwarnings("ignore") | |
# Change plot style to ggplot (for better and more aesthetic visualisation) | |
plt.style.use('ggplot') | |
class StockPredictor(object): | |
def __init__(self, company, test_size=0.33, | |
n_hidden_states=4, n_latency_days=10, | |
n_steps_frac_change=50, n_steps_frac_high=10, | |
n_steps_frac_low=10): | |
self._init_logger() | |
self.company = company | |
self.n_latency_days = n_latency_days | |
self.hmm = GaussianHMM(n_components=n_hidden_states) | |
self._split_train_test_data(test_size) | |
self._compute_all_possible_outcomes( | |
n_steps_frac_change, n_steps_frac_high, n_steps_frac_low) | |
def _init_logger(self): | |
self._logger = logging.getLogger(__name__) | |
handler = logging.StreamHandler() | |
formatter = logging.Formatter( | |
'%(asctime)s %(name)-12s %(levelname)-8s %(message)s') | |
handler.setFormatter(formatter) | |
self._logger.addHandler(handler) | |
self._logger.setLevel(logging.DEBUG) | |
def _split_train_test_data(self, test_size): | |
data = pd.read_csv( | |
'data/company_data/{company}.csv'.format(company=self.company)) | |
_train_data, test_data = train_test_split( | |
data, test_size=test_size, shuffle=False) | |
self._train_data = _train_data | |
self._test_data = test_data | |
@staticmethod | |
def _extract_features(data): | |
open_price = np.array(data['open']) | |
close_price = np.array(data['close']) | |
high_price = np.array(data['high']) | |
low_price = np.array(data['low']) | |
# Compute the fraction change in close, high and low prices | |
# which would be used a feature | |
frac_change = (close_price - open_price) / open_price | |
frac_high = (high_price - open_price) / open_price | |
frac_low = (open_price - low_price) / open_price | |
return np.column_stack((frac_change, frac_high, frac_low)) | |
def fit(self): | |
self._logger.info('>>> Extracting Features') | |
feature_vector = StockPredictor._extract_features(self._train_data) | |
self._logger.info('Features extraction Completed <<<') | |
self.hmm.fit(feature_vector) | |
def _compute_all_possible_outcomes(self, n_steps_frac_change, | |
n_steps_frac_high, n_steps_frac_low): | |
frac_change_range = np.linspace(-0.1, 0.1, n_steps_frac_change) | |
frac_high_range = np.linspace(0, 0.1, n_steps_frac_high) | |
frac_low_range = np.linspace(0, 0.1, n_steps_frac_low) | |
self._possible_outcomes = np.array(list(itertools.product( | |
frac_change_range, frac_high_range, frac_low_range))) | |
def _get_most_probable_outcome(self, day_index): | |
previous_data_start_index = max(0, day_index - self.n_latency_days) | |
previous_data_end_index = max(0, day_index - 1) | |
previous_data = self._test_data.iloc[previous_data_end_index: previous_data_start_index] | |
previous_data_features = StockPredictor._extract_features( | |
previous_data) | |
outcome_score = [] | |
for possible_outcome in self._possible_outcomes: | |
total_data = np.row_stack( | |
(previous_data_features, possible_outcome)) | |
outcome_score.append(self.hmm.score(total_data)) | |
most_probable_outcome = self._possible_outcomes[np.argmax( | |
outcome_score)] | |
return most_probable_outcome | |
def predict_close_price(self, day_index): | |
open_price = self._test_data.iloc[day_index]['open'] | |
predicted_frac_change, _, _ = self._get_most_probable_outcome( | |
day_index) | |
return open_price * (1 + predicted_frac_change) | |
def predict_close_prices_for_days(self, days, with_plot=False): | |
predicted_close_prices = [] | |
for day_index in tqdm(range(days)): | |
predicted_close_prices.append(self.predict_close_price(day_index)) | |
if with_plot: | |
test_data = self._test_data[0: days] | |
days = np.array(test_data['date'], dtype="datetime64[ms]") | |
actual_close_prices = test_data['close'] | |
fig = plt.figure() | |
axes = fig.add_subplot(111) | |
axes.plot(days, actual_close_prices, 'bo-', label="actual") | |
axes.plot(days, predicted_close_prices, 'r+-', label="predicted") | |
axes.set_title('{company}'.format(company=self.company)) | |
fig.autofmt_xdate() | |
plt.legend() | |
plt.show() | |
return predicted_close_prices | |
stock_predictor = StockPredictor(company=args['--company']) | |
stock_predictor.fit() | |
stock_predictor.predict_close_prices_for_days(500, with_plot=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment