Created
June 22, 2015 15:04
-
-
Save StuartGordonReid/0d9745f39a034758e51e to your computer and use it in GitHub Desktop.
Regression Analysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__author__ = 'Stuart Gordon Reid' | |
import os as os | |
import csv as csv | |
import numpy as np | |
import scipy as spy | |
import sklearn as kit | |
import pandas as pandas | |
import statsmodels.api as sm | |
import matplotlib.pyplot as plot | |
from Quandl import get | |
from statsmodels.sandbox.regression.predstd import wls_prediction_std | |
class StatsModelsSettings(): | |
""" | |
This class contains settings for the statsmodels package, settings include, | |
* exponent:int - when equal to one this is a straight line, when >1 this is a curve | |
* confidence:boolean - specifies whether confidence lines should be calculated and plotted | |
""" | |
exponent = 1 | |
confidence = False | |
def __init__(self, exponent=1, confidence=False): | |
""" | |
This initialization method constructs a new StatsModelSettings object | |
""" | |
self.exponent = exponent | |
self.confidence = confidence | |
pass | |
class QuandlSettings(): | |
""" | |
This class contains settings for the quandl integration package, settings include, | |
* rows:int - specifies the amount of historical data to extract in [frequency] | |
* column:int - specifies the column in the data-set to use for the regression analysis | |
* frequency:String - select between ("daily"|weekly"|"monthly"|"quarterly"|"annual") | |
* transformation:String - select the numerical transformation ("diff"|"rdiff"|"normalize"|"cumul") | |
* order:String - select order of data between ("asc"|"desc") | |
""" | |
rows = 0 | |
column = 1 | |
frequency = "weekly" | |
transformation = "normalize" | |
order = "desc" | |
def __init__(self, rows, column, frequency="weekly", transformation="normalize", order="desc"): | |
""" | |
This initialization method constructs a new QuandlSettings object | |
""" | |
self.rows = rows | |
self.column = column | |
self.frequency = frequency | |
self.transformation = transformation | |
self.order = order | |
pass | |
class RegressionAnalysis(): | |
""" | |
This class contain the logic for calculating the regression analysis given a Quandl data-set name, a QuandlSettings | |
object, and a StatsModelsSettings object. The resulting regression analysis is returned. | |
""" | |
color = 'r' | |
dates = [] | |
prices = [] | |
data_set = "" | |
regression = None | |
upper = None | |
lower = None | |
def __init__(self, quandl_data_set_name, quandl_settings, statsmodels_settings, color='r'): | |
""" | |
This initialization method constructs a new RegressionAnalysis object | |
""" | |
self.color = color | |
self.data_set = quandl_data_set_name | |
self.dates, self.prices = self.get_quandl_data(self.data_set, quandl_settings) | |
# Only calculate and return confidence lines if setting = True | |
if statsmodels_settings.confidence: | |
self.regression, self.lower, self.upper = self.run_ordinary_least_squares(self.dates, self.prices, | |
statsmodels_settings) | |
else: | |
self.regression = self.run_ordinary_least_squares(self.dates, self.prices, statsmodels_settings) | |
pass | |
@staticmethod | |
def get_quandl_data(quandl_data_set_name, quandl_settings): | |
""" | |
This method retrieves the quandl data set given the settings specified in the quandl_settings object. For more | |
information about these settings see documentation from the QuandlSettings class | |
""" | |
quandl_data_set = get(quandl_data_set_name, rows=quandl_settings.rows, returns="numpy", | |
transformation=quandl_settings.transformation, | |
sort_order=quandl_settings.order, collapse=quandl_settings.frequency) | |
print(quandl_data_set) | |
quandl_dates = np.arange(1, quandl_settings.rows + 1, 1) | |
quandl_prices = [] | |
# TODO: find a better way to extract some column, X, from numpy matrix of tuples (w, x, y, z) | |
for i in range(quandl_data_set.size): | |
quandl_prices.append(quandl_data_set[quandl_settings.rows - (i + 1)][quandl_settings.column] / 100) | |
return quandl_dates, quandl_prices | |
@staticmethod | |
def run_ordinary_least_squares(ols_dates, ols_data, statsmodels_settings): | |
""" | |
This method receives the dates and prices of a Quandl data-set as well as settings for the StatsModels package, | |
it then calculates the regression lines and / or the confidence lines are returns the objects | |
""" | |
intercept = np.column_stack((ols_dates, ols_dates ** statsmodels_settings.exponent)) | |
constant = sm.add_constant(intercept) | |
statsmodel_regression = sm.OLS(ols_data, constant).fit() | |
print(statsmodel_regression.summary()) | |
if statsmodels_settings.confidence: | |
prstd, lower, upper = wls_prediction_std(statsmodel_regression) | |
return statsmodel_regression, lower, upper | |
else: | |
return statsmodel_regression | |
def plot_regression_line(regression_analyses): | |
""" | |
This global method is a front-end to the MatplotLib library which receives a set of regression analyses and plots | |
each one of them onto the canvas. | |
""" | |
title = "" | |
fig, ax = plot.subplots() | |
# Plot each regression analysis in the set | |
for regression_i in regression_analyses: | |
ax.plot(regression_i.dates, regression_i.prices, regression_i.color, label="Values " + regression_i.data_set) | |
ax.plot(regression_i.dates, regression_i.regression.fittedvalues, regression_i.color + '.', | |
label="Regression line " + regression_i.data_set) | |
if regression_i.lower is not None: | |
ax.plot(regression_i.dates, regression_i.lower, regression_i.color + '--') | |
if regression_i.upper is not None: | |
ax.plot(regression_i.dates, regression_i.upper, regression_i.color + '--') | |
plot.xlabel('Time') | |
plot.ylabel('Normalized Values') | |
title += regression_i.data_set + ", " | |
plot.title('Regression Analysis of ' + title) | |
ax.legend(loc='best') | |
plot.grid(True) | |
plot.show() | |
def investing_example(): | |
""" | |
This method creates a set of regression analyses based on fundamental trading (revenues) | |
""" | |
# b: blue, g: green, r: red, c: cyan, m: magenta, y: yellow, k: black, w: white | |
statsmodels_args_inv = StatsModelsSettings(2, False) | |
quandl_args_inv = QuandlSettings(5, 1, "yearly") | |
regressions_inv = [RegressionAnalysis("DMDRN/GOOG_REV_LAST", quandl_args_inv, statsmodels_args_inv, 'b'), | |
RegressionAnalysis("DMDRN/YHOO_REV_LAST", quandl_args_inv, statsmodels_args_inv, 'g'), | |
RegressionAnalysis("DMDRN/AAPL_REV_LAST", quandl_args_inv, statsmodels_args_inv, 'k')] | |
plot_regression_line(regressions_inv) | |
def trading_example(): | |
""" | |
This method creates a set of regression analyses based on technical trading details (price) | |
""" | |
# b: blue, g: green, r: red, c: cyan, m: magenta, y: yellow, k: black, w: white | |
statsmodels_args_trade = StatsModelsSettings(1, True) | |
quandl_args_trade = QuandlSettings(350, 4, "weekly") | |
regressions_trade = [RegressionAnalysis("GOOG/NASDAQ_GOOG", quandl_args_trade, statsmodels_args_trade, 'b'), | |
RegressionAnalysis("GOOG/NASDAQ_YHOO", quandl_args_trade, statsmodels_args_trade, 'g')] | |
plot_regression_line(regressions_trade) | |
def economics_example(): | |
""" | |
This method creates a set of regression analyses based on economics GDP's of the BRICS nations, | |
""" | |
# b: blue, g: green, r: red, c: cyan, m: magenta, y: yellow, k: black, w: white | |
statsmodels_args = StatsModelsSettings(1, False) | |
quandl_args_prices = QuandlSettings(15, 1, "yearly") | |
# South Africa, China, Brazil, India, Russia | |
regressions = [RegressionAnalysis("WORLDBANK/ZAF_NY_GDP_MKTP_KN", quandl_args_prices, statsmodels_args, 'b'), | |
RegressionAnalysis("WORLDBANK/CHN_NY_GDP_MKTP_KN", quandl_args_prices, statsmodels_args, 'g'), | |
RegressionAnalysis("WORLDBANK/BRA_NY_GDP_MKTP_KN", quandl_args_prices, statsmodels_args, 'k'), | |
RegressionAnalysis("WORLDBANK/IND_NY_GDP_MKTP_KN", quandl_args_prices, statsmodels_args, 'm'), | |
RegressionAnalysis("WORLDBANK/RUS_NY_GDP_MKTP_KN", quandl_args_prices, statsmodels_args, 'c')] | |
plot_regression_line(regressions) | |
if __name__ == "__main__": | |
# This main method run the regression analysis program | |
trading_example() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment