Loading
      
  Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
    
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import numpy as np | |
| import pandas as pd | |
| import spacy | |
| from spacy.lang.en.stop_words import STOP_WORDS | |
| from spacy.lang.en import English | |
| from spacy import displacy, lemmatizer | |
| # Clean text so it's ready for preprocessing | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | def preprocess(clean_corpus): | |
| """ | |
| Takes in a speech, tokenizes and breaks it out into sentences, lemmatizes and removes stop words and punctuation for each speech | |
| """ | |
| # Create our list of punctuation marks | |
| punctuations = '!"#$%&\'()’*+,-./:”;<=>?@[\\]^_`{|}~' | |
| # Create our list of stopwords | |
| stop_words = spacy.lang.en.stop_words.STOP_WORDS | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | def get_data(symbol, | |
| client_id, | |
| periodType = 'year', | |
| n_periods = 20, | |
| frequencyType = 'daily', | |
| frequency = 1): | |
| """ | |
| Yields a dataframe of close price data for the given parameters | |
| """ | |
| # Initialize parameters | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | def calc_return(dataframe, lag = 1): | |
| """ | |
| Adds a column of the previous close to the dataframe. Lag is a user-input parameter. | |
| """ | |
| prevClose = [x for x in dataframe['close'][:-lag]] | |
| prevClose = [np.nan for i in range(lag)] + prevClose | |
| dataframe[f'{lag}-day prevClose'] = prevClose | |
| dataframe['return'] = np.log(dataframe[f'{lag}-day prevClose']).diff() | |
| return dataframe | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | def mean_std(dataframe, length=20): | |
| """ | |
| Adds 2 columns to our dataframe: A rolling mean and standard deviations of user-defined lengths | |
| """ | |
| dataframe[f'sma{length}'] = dataframe['return'].rolling(length).mean() | |
| dataframe[f'std{length}'] = dataframe['return'].rolling(length).std() | |
| # Remove leading NaNs | |
| dataframe.dropna(inplace=True) | |
| mean_std(dataframe) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | dftest = sm.tsa.adfuller(dataframe['return'], autolag='AIC') | |
| dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observation Used']) | |
| for key, value in dftest[4].items(): | |
| dfoutput['Critical Value ({0})'.format(key)] = value | |
| dfoutput | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | fig, ax = plt.subplots(figsize=(12,5)) | |
| plot_acf(dataframe['return'], lags=10, ax=ax) | |
| plt.show() | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | fig, ax = plt.subplots(figsize=(12,5)) | |
| plot_pacf(dataframe['return'], lags=10, ax=ax) | |
| plt.show() | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # Build model and print summary | |
| ar1 = ARMA(tuple(dataframe['return']), (6,6)).fit() | |
| ar1.summary() | |
| # Generate predictions | |
| preds = ar1.fittedvalues | |
| # Add predictions to our dataframe | |
| dataframe['predictions'] = dataframe[dataframe.columns[1]] * (1 + preds) | 
OlderNewer