Skip to content

Instantly share code, notes, and snippets.

@StuartGordonReid
Created June 22, 2015 15:04
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save StuartGordonReid/0d9745f39a034758e51e to your computer and use it in GitHub Desktop.
Save StuartGordonReid/0d9745f39a034758e51e to your computer and use it in GitHub Desktop.
Regression Analysis
__author__ = 'Stuart Gordon Reid'
import os as os
import csv as csv
import numpy as np
import scipy as spy
import sklearn as kit
import pandas as pandas
import statsmodels.api as sm
import matplotlib.pyplot as plot
from Quandl import get
from statsmodels.sandbox.regression.predstd import wls_prediction_std
class StatsModelsSettings():
"""
This class contains settings for the statsmodels package, settings include,
* exponent:int - when equal to one this is a straight line, when >1 this is a curve
* confidence:boolean - specifies whether confidence lines should be calculated and plotted
"""
exponent = 1
confidence = False
def __init__(self, exponent=1, confidence=False):
"""
This initialization method constructs a new StatsModelSettings object
"""
self.exponent = exponent
self.confidence = confidence
pass
class QuandlSettings():
"""
This class contains settings for the quandl integration package, settings include,
* rows:int - specifies the amount of historical data to extract in [frequency]
* column:int - specifies the column in the data-set to use for the regression analysis
* frequency:String - select between ("daily"|weekly"|"monthly"|"quarterly"|"annual")
* transformation:String - select the numerical transformation ("diff"|"rdiff"|"normalize"|"cumul")
* order:String - select order of data between ("asc"|"desc")
"""
rows = 0
column = 1
frequency = "weekly"
transformation = "normalize"
order = "desc"
def __init__(self, rows, column, frequency="weekly", transformation="normalize", order="desc"):
"""
This initialization method constructs a new QuandlSettings object
"""
self.rows = rows
self.column = column
self.frequency = frequency
self.transformation = transformation
self.order = order
pass
class RegressionAnalysis():
"""
This class contain the logic for calculating the regression analysis given a Quandl data-set name, a QuandlSettings
object, and a StatsModelsSettings object. The resulting regression analysis is returned.
"""
color = 'r'
dates = []
prices = []
data_set = ""
regression = None
upper = None
lower = None
def __init__(self, quandl_data_set_name, quandl_settings, statsmodels_settings, color='r'):
"""
This initialization method constructs a new RegressionAnalysis object
"""
self.color = color
self.data_set = quandl_data_set_name
self.dates, self.prices = self.get_quandl_data(self.data_set, quandl_settings)
# Only calculate and return confidence lines if setting = True
if statsmodels_settings.confidence:
self.regression, self.lower, self.upper = self.run_ordinary_least_squares(self.dates, self.prices,
statsmodels_settings)
else:
self.regression = self.run_ordinary_least_squares(self.dates, self.prices, statsmodels_settings)
pass
@staticmethod
def get_quandl_data(quandl_data_set_name, quandl_settings):
"""
This method retrieves the quandl data set given the settings specified in the quandl_settings object. For more
information about these settings see documentation from the QuandlSettings class
"""
quandl_data_set = get(quandl_data_set_name, rows=quandl_settings.rows, returns="numpy",
transformation=quandl_settings.transformation,
sort_order=quandl_settings.order, collapse=quandl_settings.frequency)
print(quandl_data_set)
quandl_dates = np.arange(1, quandl_settings.rows + 1, 1)
quandl_prices = []
# TODO: find a better way to extract some column, X, from numpy matrix of tuples (w, x, y, z)
for i in range(quandl_data_set.size):
quandl_prices.append(quandl_data_set[quandl_settings.rows - (i + 1)][quandl_settings.column] / 100)
return quandl_dates, quandl_prices
@staticmethod
def run_ordinary_least_squares(ols_dates, ols_data, statsmodels_settings):
"""
This method receives the dates and prices of a Quandl data-set as well as settings for the StatsModels package,
it then calculates the regression lines and / or the confidence lines are returns the objects
"""
intercept = np.column_stack((ols_dates, ols_dates ** statsmodels_settings.exponent))
constant = sm.add_constant(intercept)
statsmodel_regression = sm.OLS(ols_data, constant).fit()
print(statsmodel_regression.summary())
if statsmodels_settings.confidence:
prstd, lower, upper = wls_prediction_std(statsmodel_regression)
return statsmodel_regression, lower, upper
else:
return statsmodel_regression
def plot_regression_line(regression_analyses):
"""
This global method is a front-end to the MatplotLib library which receives a set of regression analyses and plots
each one of them onto the canvas.
"""
title = ""
fig, ax = plot.subplots()
# Plot each regression analysis in the set
for regression_i in regression_analyses:
ax.plot(regression_i.dates, regression_i.prices, regression_i.color, label="Values " + regression_i.data_set)
ax.plot(regression_i.dates, regression_i.regression.fittedvalues, regression_i.color + '.',
label="Regression line " + regression_i.data_set)
if regression_i.lower is not None:
ax.plot(regression_i.dates, regression_i.lower, regression_i.color + '--')
if regression_i.upper is not None:
ax.plot(regression_i.dates, regression_i.upper, regression_i.color + '--')
plot.xlabel('Time')
plot.ylabel('Normalized Values')
title += regression_i.data_set + ", "
plot.title('Regression Analysis of ' + title)
ax.legend(loc='best')
plot.grid(True)
plot.show()
def investing_example():
"""
This method creates a set of regression analyses based on fundamental trading (revenues)
"""
# b: blue, g: green, r: red, c: cyan, m: magenta, y: yellow, k: black, w: white
statsmodels_args_inv = StatsModelsSettings(2, False)
quandl_args_inv = QuandlSettings(5, 1, "yearly")
regressions_inv = [RegressionAnalysis("DMDRN/GOOG_REV_LAST", quandl_args_inv, statsmodels_args_inv, 'b'),
RegressionAnalysis("DMDRN/YHOO_REV_LAST", quandl_args_inv, statsmodels_args_inv, 'g'),
RegressionAnalysis("DMDRN/AAPL_REV_LAST", quandl_args_inv, statsmodels_args_inv, 'k')]
plot_regression_line(regressions_inv)
def trading_example():
"""
This method creates a set of regression analyses based on technical trading details (price)
"""
# b: blue, g: green, r: red, c: cyan, m: magenta, y: yellow, k: black, w: white
statsmodels_args_trade = StatsModelsSettings(1, True)
quandl_args_trade = QuandlSettings(350, 4, "weekly")
regressions_trade = [RegressionAnalysis("GOOG/NASDAQ_GOOG", quandl_args_trade, statsmodels_args_trade, 'b'),
RegressionAnalysis("GOOG/NASDAQ_YHOO", quandl_args_trade, statsmodels_args_trade, 'g')]
plot_regression_line(regressions_trade)
def economics_example():
"""
This method creates a set of regression analyses based on economics GDP's of the BRICS nations,
"""
# b: blue, g: green, r: red, c: cyan, m: magenta, y: yellow, k: black, w: white
statsmodels_args = StatsModelsSettings(1, False)
quandl_args_prices = QuandlSettings(15, 1, "yearly")
# South Africa, China, Brazil, India, Russia
regressions = [RegressionAnalysis("WORLDBANK/ZAF_NY_GDP_MKTP_KN", quandl_args_prices, statsmodels_args, 'b'),
RegressionAnalysis("WORLDBANK/CHN_NY_GDP_MKTP_KN", quandl_args_prices, statsmodels_args, 'g'),
RegressionAnalysis("WORLDBANK/BRA_NY_GDP_MKTP_KN", quandl_args_prices, statsmodels_args, 'k'),
RegressionAnalysis("WORLDBANK/IND_NY_GDP_MKTP_KN", quandl_args_prices, statsmodels_args, 'm'),
RegressionAnalysis("WORLDBANK/RUS_NY_GDP_MKTP_KN", quandl_args_prices, statsmodels_args, 'c')]
plot_regression_line(regressions)
if __name__ == "__main__":
# This main method run the regression analysis program
trading_example()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment