Skip to content

Instantly share code, notes, and snippets.

@shamdasani
Created July 14, 2017 22:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shamdasani/54dfa47845873444139fc147027399d3 to your computer and use it in GitHub Desktop.
Save shamdasani/54dfa47845873444139fc147027399d3 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
import quandl, math, time, datetime
import matplotlib.pyplot as plt
import pickle
from matplotlib import style
from sklearn import preprocessing, model_selection, svm
from sklearn.linear_model import LinearRegression
quandl.ApiConfig.api_key = ''
df = quandl.get('WIKI/GOOGL')
df=df[['Adj. Close']]
df.fillna(-99999, inplace=True) # 1) what happens if I take this out?
forecast = int(math.ceil(0.01*len(df))) # 1% of stocks lifetime to be forecasted
df['Forecast'] = df['Adj. Close'].shift(-forecast) # 2) figure out correct shifting model
'''
if this shift is used,
[1% of stocks lifetime] (e.g. 33) days ago is the forcasted price for that day.
this shift is built to learn from the stocks trend.
'''
# X - Features (Adj. Close) | y - Labels (Forecast)
X = np.array(df.drop(['Forecast'], 1))
X = preprocessing.scale(X)
X_lately = X[-forecast:] # understand this!
X = X[:-forecast] # understand this too!
df.dropna(inplace=True) #understand Drop NA
y = np.array(df['Forecast'])
# Training - test size?
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2)
# Classifier
clf = LinearRegression(n_jobs=-1)
clf.fit(X_train, y_train)
# Storing + Accessing Training Data
with open('prediction.pickle','wb') as f:
pickle.dump(clf, f)
pickle_in = open('prediction.pickle', 'rb')
clf = pickle.load(pickle_in)
accuracy = clf.score(X_test, y_test)
forecast_set = clf.predict(X_lately)
print(forecast_set, accuracy, forecast)
# Fix date + time issues
# RIGHT NOW: Data is just shifting back 5 places as seen in forecast variable
df['Forecast'] = np.nan
last_date = df.iloc[-1].name
last_unix = time.mktime(last_date.timetuple())
one_day = 86400
next_unix = last_unix + one_day
for i in forecast_set:
next_date = datetime.datetime.fromtimestamp(next_unix)
next_unix += one_day
df.loc[next_date] = [np.nan for _ in range(len(df.columns)-1)] + [i]
# Plotting on Graph
style.use('ggplot')
df['Adj. Close'].plot()
df['Forecast'].plot()
plt.legend(loc=4)
plt.xlabel("Date")
plt.ylabel("Price")
plt.show()
print(df.tail(40))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment