Skip to content

Instantly share code, notes, and snippets.

View mberrien-fitzsimons's full-sized avatar
:electron:

Markisha Berrien-Fitzsimons mberrien-fitzsimons

:electron:
View GitHub Profile
dist: xenial
language: python
python:
- "3.7.1"
install:
- pip install -r requirements.txt
- pip install pandas
script:
- pytest
from setuptools import setup
install_requires = [
'pandas>=0.25.0',
'numpy>=1.15.4',
'functools']
setup(name='misha_math',
version='0.0.1',
description='test',
import pytest
from mypackage_two.pandas_math import create_empty_dataframe
class TestCreateEmptyDataframe(object):
def test_on_create_empty_dataframe(self):
actual = len(create_empty_dataframe(['foo', 'bar'], 20))
expected = 20
import pandas as pd
import numpy as np
def create_empty_dataframe(new_column_list, num_rows):
"""
Creates a new dataframe filled with zeroes from a specified
list and number of rows.
Args:
new_col_list (object): List of column names.
import pandas as pd
import numpy as np
def create_empty_dataframe(new_column_list, num_rows):
"""
Creates a new dataframe filled with zeroes from a specified
list and number of rows.
Args:
# put data processing code into function
def process_alphavantage_data_create_dow_dummies(raw_data_file):
raw_data_file['timestamp'] = pd.to_datetime(raw_data_file['timestamp'])
raw_data_file['day_of_week'] = raw_data_file['timestamp'].dt.day_name()
dummies = pd.get_dummies(raw_data_file['day_of_week'])
raw_data_file.drop(columns=['day_of_week'], inplace=True)
raw_data_file = pd.concat([raw_data_file, dummies], axis=1)
# we are only interested in running a regression of volume against the dummy
# variables for days of the week. Because of this we will drop the remaining
# variables before importing it to our processed data folder
# Put data collection code into a .py document in the src/d00_utils folder. From there
# it can be imported into different jupyter notebooks for easy data
def alphavantage_api_csv_download_raw(function, symbol, alpha_vantage_key):
function = function
symbol = symbol
datatype = 'csv'
url = f"https://www.alphavantage.co/query?function={function}&symbol={symbol}\
&datatype={datatype}&apikey={ALPHA_VANTAGE_KEY}"
return pd.read_csv(url)
# save the model
filename = '../data/04_models/finalized_model.sav'
pickle.dump(model, open(filename, 'wb'))
# import required library
from statsmodels.formula.api import ols
#Define the Problem
outcome = 'volume'
x_cols = ['Friday', 'Monday', 'Thursday', 'Tuesday', 'Wednesday']
#Fitting the actual model
predictors = '+'.join(x_cols)
formula = outcome + "~" + predictors
# read new dataframe in from processed data folder
msft_model_df = pd.read_csv('../data/03_processed/msft_proc.csv')