Skip to content

Instantly share code, notes, and snippets.

View mberrien-fitzsimons's full-sized avatar
:electron:

Markisha Berrien-Fitzsimons mberrien-fitzsimons

:electron:
View GitHub Profile
family_members = ['Misha', 'Kisha', 'Drew']
family_member.lower()
# read in libraries
import os
import sys
import pandas as pd
import requests
from iexfinance.stocks import Stock
from iexfinance.refdata import get_symbols
root_dir = os.path.join(os.getcwd(), '..')
# this blog of code allows me to import my secrets file from the conf folder in my
# current jupyter notebook
root_dir = os.path.join(os.getcwd(), '..')
sys.path.append(root_dir)
from conf import secrets
# Download data as CSV from API
function = 'TIME_SERIES_DAILY_ADJUSTED'
symbol = 'MSFT'
# import data from '01_raw' data folder into the processing notebook
msft_proc = pd.read_csv('../data/01_raw/msft_raw.csv', parse_dates=['timestamp'])
# Let's take a look at our dataset
msft_proc.head()
# Add days of the week
msft_proc['day_of_week'] = msft_proc['timestamp'].dt.day_name()
# create dummy variables
dummies = pd.get_dummies(msft_proc['day_of_week'])
# drop original days of the week column from the original dataframe
msft_proc.drop(columns=['day_of_week'], inplace=True)
# add two dataframes together
# read new dataframe in from processed data folder
msft_model_df = pd.read_csv('../data/03_processed/msft_proc.csv')
# import required library
from statsmodels.formula.api import ols
#Define the Problem
outcome = 'volume'
x_cols = ['Friday', 'Monday', 'Thursday', 'Tuesday', 'Wednesday']
#Fitting the actual model
predictors = '+'.join(x_cols)
formula = outcome + "~" + predictors
# save the model
filename = '../data/04_models/finalized_model.sav'
pickle.dump(model, open(filename, 'wb'))
# Put data collection code into a .py document in the src/d00_utils folder. From there
# it can be imported into different jupyter notebooks for easy data
def alphavantage_api_csv_download_raw(function, symbol, alpha_vantage_key):
function = function
symbol = symbol
datatype = 'csv'
url = f"https://www.alphavantage.co/query?function={function}&symbol={symbol}\
&datatype={datatype}&apikey={ALPHA_VANTAGE_KEY}"
return pd.read_csv(url)
# put data processing code into function
def process_alphavantage_data_create_dow_dummies(raw_data_file):
raw_data_file['timestamp'] = pd.to_datetime(raw_data_file['timestamp'])
raw_data_file['day_of_week'] = raw_data_file['timestamp'].dt.day_name()
dummies = pd.get_dummies(raw_data_file['day_of_week'])
raw_data_file.drop(columns=['day_of_week'], inplace=True)
raw_data_file = pd.concat([raw_data_file, dummies], axis=1)
# we are only interested in running a regression of volume against the dummy
# variables for days of the week. Because of this we will drop the remaining
# variables before importing it to our processed data folder