Created
January 22, 2017 09:41
-
-
Save wermarter/b9c36b5421ea1ba1cec7ff8d99c27e63 to your computer and use it in GitHub Desktop.
Pickle vs. Joblib, some ML with update features, DF, predict GOOGL from Quandl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import quandl, pickle, math, datetime | |
import numpy as np | |
import pandas as pd | |
from sklearn.linear_model import LinearRegression | |
from sklearn import preprocessing, cross_validation | |
from sklearn.externals import joblib | |
import matplotlib.pyplot as plt | |
from matplotlib import style | |
one_day = 86400 | |
one_week = one_day*7 | |
# READ DATA | |
try: | |
df = pickle.load(open('df.pkl', 'rb')) | |
if datetime.datetime.now().timestamp()-df.iloc[-1].name.timestamp()>one_week: | |
raise | |
except: | |
print('Getting new data from Quandl...') | |
df = quandl.get('WIKI/GOOGL', api_key='GxHxjaX1EiVzszDc1CQt') | |
df['HL_PCT'] = (df['Adj. High']-df['Adj. Low'])/df['Adj. Close']*100.0 | |
df['PCT_Change'] = (df['Adj. Close']-df['Adj. Open'])/df['Adj. Open']*100.0 | |
df = df[['Adj. Close', 'HL_PCT', 'PCT_Change', 'Adj. Volume']] | |
df.fillna(value=-99999, inplace=True) | |
pickle.dump(df, open('df.pkl', 'wb')) | |
# BACKUP DF | |
df_orig = pd.DataFrame() | |
df_orig = df_orig.append(df) | |
# SETTING | |
forecast_col = 'Adj. Close' | |
forecast_out = math.ceil(0.01*len(df)) | |
X = np.array(df) | |
X = preprocessing.scale(X) | |
X_predict = X[-forecast_out:] | |
X = X[:-forecast_out] | |
df['label'] = df[forecast_col].shift(-forecast_out) | |
df.dropna(inplace=True) | |
Y = np.array(df['label']) | |
# DO_IT | |
X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2) | |
confidence = 0 | |
try: | |
clf = joblib.load('clf.pkl') | |
confidence = clf.score(X_test, Y_test) | |
if confidence < 0.9: | |
raise | |
except: | |
print('Re-fitting the line...') | |
clf = LinearRegression() | |
clf.fit(X_train, Y_train) | |
joblib.dump(clf, 'clf.pkl') | |
confidence = clf.score(X_test, Y_test) if confidence==0 else confidence | |
forecast_set = clf.predict(X_predict) | |
# Prepare for VISUAL | |
df = df_orig | |
df['Forecast'] = np.nan | |
last_date = df.iloc[-1] | |
last_date['Forecast'] = last_date['Adj. Close'] | |
df.iloc[-1] = last_date | |
next_unix = df.iloc[-1].name.timestamp() + one_day | |
for i in forecast_set: | |
next_date = datetime.datetime.fromtimestamp(next_unix) | |
df.loc[next_date] = [np.nan]*(len(df.columns)-1)+[i] | |
next_unix += one_day | |
# PLOT_IT | |
style.use('ggplot') | |
df['Adj. Close'].plot() | |
df['Forecast'].plot() | |
plt.legend(loc=4) # Locate legend in IV part | |
plt.xlabel('Time') | |
plt.ylabel('Price') | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment