Marcos Dominguez mdominguez2010

## linear_reg.ipynb

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                mdominguez2010
                / linear_reg.ipynb
            
            
              Last active
              October 13, 2020 02:46
            
              
                Sneaker Data
              
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## clean.py
import numpy as np
import pandas as pd

import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from spacy.lang.en import English
from spacy import displacy, lemmatizer

# Clean text so it's ready for preprocessing

## preprocess.py
def preprocess(clean_corpus):
    """
    Takes in a speech, tokenizes and breaks it out into sentences, lemmatizes and removes stop words and punctuation for each speech
    """

    # Create our list of punctuation marks
    punctuations = '!"#$%&\'()’*+,-./:”;<=>?@[\\]^_`{|}~'

    # Create our list of stopwords
    stop_words = spacy.lang.en.stop_words.STOP_WORDS

## get_data.py
def get_data(symbol,
             client_id,
             periodType = 'year',
             n_periods = 20,
             frequencyType = 'daily',
             frequency = 1):
    """
    Yields a dataframe of  close price data for the given parameters
    """
    # Initialize parameters

## calc_return.py
def calc_return(dataframe, lag = 1):
    """
    Adds a column of the previous close to the dataframe. Lag is a user-input parameter.
    """
    prevClose = [x for x in dataframe['close'][:-lag]]
    prevClose = [np.nan for i in range(lag)] + prevClose
    dataframe[f'{lag}-day prevClose'] = prevClose
    dataframe['return'] = np.log(dataframe[f'{lag}-day prevClose']).diff()

    return dataframe

## mean_std.py
def mean_std(dataframe, length=20):
    """
    Adds 2 columns to our dataframe: A rolling mean and standard deviations of user-defined lengths
    """
    dataframe[f'sma{length}'] = dataframe['return'].rolling(length).mean()
    dataframe[f'std{length}'] = dataframe['return'].rolling(length).std()
    # Remove leading NaNs
    dataframe.dropna(inplace=True)

mean_std(dataframe)

## dftest.py
dftest = sm.tsa.adfuller(dataframe['return'], autolag='AIC')
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observation Used'])
for key, value in dftest[4].items():
    dfoutput['Critical Value ({0})'.format(key)] = value

dfoutput

## acf.py
fig, ax = plt.subplots(figsize=(12,5))
plot_acf(dataframe['return'], lags=10, ax=ax)
plt.show()

## pacf.py
fig, ax = plt.subplots(figsize=(12,5))
plot_pacf(dataframe['return'], lags=10, ax=ax)
plt.show()

## arima.py
# Build model and print summary
ar1 = ARMA(tuple(dataframe['return']), (6,6)).fit()
ar1.summary()

# Generate predictions
preds = ar1.fittedvalues

# Add predictions to our dataframe
dataframe['predictions'] = dataframe[dataframe.columns[1]] * (1 + preds)
	import numpy as np
	import pandas as pd

	import spacy
	from spacy.lang.en.stop_words import STOP_WORDS
	from spacy.lang.en import English
	from spacy import displacy, lemmatizer

	# Clean text so it's ready for preprocessing
	def preprocess(clean_corpus):
	"""
	Takes in a speech, tokenizes and breaks it out into sentences, lemmatizes and removes stop words and punctuation for each speech
	"""

	# Create our list of punctuation marks
	punctuations = '!"#$%&\'()’*+,-./:”;<=>?@[\\]^_`{\|}~'

	# Create our list of stopwords
	stop_words = spacy.lang.en.stop_words.STOP_WORDS
	def get_data(symbol,
	client_id,
	periodType = 'year',
	n_periods = 20,
	frequencyType = 'daily',
	frequency = 1):
	"""
	Yields a dataframe of close price data for the given parameters
	"""
	# Initialize parameters
	def calc_return(dataframe, lag = 1):
	"""
	Adds a column of the previous close to the dataframe. Lag is a user-input parameter.
	"""
	prevClose = [x for x in dataframe['close'][:-lag]]
	prevClose = [np.nan for i in range(lag)] + prevClose
	dataframe[f'{lag}-day prevClose'] = prevClose
	dataframe['return'] = np.log(dataframe[f'{lag}-day prevClose']).diff()

	return dataframe
	def mean_std(dataframe, length=20):
	"""
	Adds 2 columns to our dataframe: A rolling mean and standard deviations of user-defined lengths
	"""
	dataframe[f'sma{length}'] = dataframe['return'].rolling(length).mean()
	dataframe[f'std{length}'] = dataframe['return'].rolling(length).std()
	# Remove leading NaNs
	dataframe.dropna(inplace=True)

	mean_std(dataframe)
	dftest = sm.tsa.adfuller(dataframe['return'], autolag='AIC')
	dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observation Used'])
	for key, value in dftest[4].items():
	dfoutput['Critical Value ({0})'.format(key)] = value

	dfoutput
	fig, ax = plt.subplots(figsize=(12,5))
	plot_acf(dataframe['return'], lags=10, ax=ax)
	plt.show()
	fig, ax = plt.subplots(figsize=(12,5))
	plot_pacf(dataframe['return'], lags=10, ax=ax)
	plt.show()
	# Build model and print summary
	ar1 = ARMA(tuple(dataframe['return']), (6,6)).fit()
	ar1.summary()

	# Generate predictions
	preds = ar1.fittedvalues

	# Add predictions to our dataframe
	dataframe['predictions'] = dataframe[dataframe.columns[1]] * (1 + preds)