Skip to content

Instantly share code, notes, and snippets.

View mdominguez2010's full-sized avatar
🎯
Focusing

Marcos Dominguez mdominguez2010

🎯
Focusing
View GitHub Profile
@mdominguez2010
mdominguez2010 / linear_reg.ipynb
Last active October 13, 2020 02:46
Sneaker Data
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@mdominguez2010
mdominguez2010 / clean.py
Last active November 15, 2020 19:59
NLP_trump_speech
import numpy as np
import pandas as pd
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from spacy.lang.en import English
from spacy import displacy, lemmatizer
# Clean text so it's ready for preprocessing
@mdominguez2010
mdominguez2010 / preprocess.py
Created November 15, 2020 19:58
NLP_trump_speech2
def preprocess(clean_corpus):
"""
Takes in a speech, tokenizes and breaks it out into sentences, lemmatizes and removes stop words and punctuation for each speech
"""
# Create our list of punctuation marks
punctuations = '!"#$%&\'()’*+,-./:”;<=>?@[\\]^_`{|}~'
# Create our list of stopwords
stop_words = spacy.lang.en.stop_words.STOP_WORDS
@mdominguez2010
mdominguez2010 / get_data.py
Last active February 16, 2021 17:50
Forecast Google's stock price
def get_data(symbol,
client_id,
periodType = 'year',
n_periods = 20,
frequencyType = 'daily',
frequency = 1):
"""
Yields a dataframe of close price data for the given parameters
"""
# Initialize parameters
@mdominguez2010
mdominguez2010 / calc_return.py
Last active February 16, 2021 17:52
forecast google stock price
def calc_return(dataframe, lag = 1):
"""
Adds a column of the previous close to the dataframe. Lag is a user-input parameter.
"""
prevClose = [x for x in dataframe['close'][:-lag]]
prevClose = [np.nan for i in range(lag)] + prevClose
dataframe[f'{lag}-day prevClose'] = prevClose
dataframe['return'] = np.log(dataframe[f'{lag}-day prevClose']).diff()
return dataframe
@mdominguez2010
mdominguez2010 / mean_std.py
Created February 16, 2021 17:59
google frecast
def mean_std(dataframe, length=20):
"""
Adds 2 columns to our dataframe: A rolling mean and standard deviations of user-defined lengths
"""
dataframe[f'sma{length}'] = dataframe['return'].rolling(length).mean()
dataframe[f'std{length}'] = dataframe['return'].rolling(length).std()
# Remove leading NaNs
dataframe.dropna(inplace=True)
mean_std(dataframe)
@mdominguez2010
mdominguez2010 / dftest.py
Last active February 16, 2021 18:03
google forecast
dftest = sm.tsa.adfuller(dataframe['return'], autolag='AIC')
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observation Used'])
for key, value in dftest[4].items():
dfoutput['Critical Value ({0})'.format(key)] = value
dfoutput
@mdominguez2010
mdominguez2010 / acf.py
Created February 16, 2021 18:12
google forecast
fig, ax = plt.subplots(figsize=(12,5))
plot_acf(dataframe['return'], lags=10, ax=ax)
plt.show()
@mdominguez2010
mdominguez2010 / pacf.py
Created February 16, 2021 18:15
forecast google
fig, ax = plt.subplots(figsize=(12,5))
plot_pacf(dataframe['return'], lags=10, ax=ax)
plt.show()
@mdominguez2010
mdominguez2010 / arima.py
Last active February 16, 2021 18:25
google forecast
# Build model and print summary
ar1 = ARMA(tuple(dataframe['return']), (6,6)).fit()
ar1.summary()
# Generate predictions
preds = ar1.fittedvalues
# Add predictions to our dataframe
dataframe['predictions'] = dataframe[dataframe.columns[1]] * (1 + preds)