Skip to content

Instantly share code, notes, and snippets.

@karishmadudani
Created August 7, 2017 18:23
Show Gist options
  • Save karishmadudani/d507072b8f5e435f58579d4e212ab3b9 to your computer and use it in GitHub Desktop.
Save karishmadudani/d507072b8f5e435f58579d4e212ab3b9 to your computer and use it in GitHub Desktop.
## This script creates new normalized input variables and ML models to be used for prediction
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.externals import joblib
import datetime as DT
#Import Tickers (Symbols) file and VIX Data file
Tickers = pd.read_csv("C:\Users\lenovo\Desktop\Karishma\Stocks\SP500_Symbols.csv")
d0_vix = pd.read_csv("C:\Users\lenovo\Desktop\Karishma\Stocks\Scripts\RawData\VixData.csv")
#Create a list of parameters for numDays and minimum profit
numDaysList = [40]
minProfitList = [10]
#Create input variables for the model
for j in numDaysList:
for k in minProfitList:
for n in range(0,50):
df = pd.read_csv("C:\Users\lenovo\Desktop\Karishma\Stocks\Scripts\RawData\\"+Tickers.iloc[n]["Symbol"]+".csv")
numDays = j
minProfit = k
#This date will be used to create the input variables and models
Date = "2017-05-05"
date_index=df[df['Date']==Date].index.tolist()
df = df[:date_index[0]+1]
for i in range(1,len(df)-numDays):
#If the price of the stock increases by min profit or more within the next 'NumDays', then class = 1, else class=0
df2 = df[i:i+numDays]
maxPrice = df2['Adj. Close'].max(axis=0)
Percent = 100*(maxPrice-df.ix[i]['Adj. Close'])/df.ix[i]['Adj. Close']
if Percent >= minProfit:
df.set_value(i,'Class',1)
else:
df.set_value(i,'Class',0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment