karishmadudani

## HistoricalData.py
## This scripts downloads historical stock prices and Vix Data for a given period.

import quandl
import pandas as pd
import csv


#Get a list of S&P 500 tickers
Tickers = pd.read_csv("C:\Users\lenovo\Desktop\Karishma\Stocks\SP500_Symbols.csv")

## HistoricalData.php
## This scripts downloads historical stock prices and Vix Data for a given period.

import quandl
import pandas as pd
import csv


#Get a list of S&P 500 tickers
Tickers = pd.read_csv("C:\Users\lenovo\Desktop\Karishma\Stocks\SP500_Symbols.csv")

## HistoricalData.py
## This scripts downloads historical stock prices for a given period.

import quandl
import pandas as pd
import csv


#Get a list of S&P 500 tickers
Tickers = pd.read_csv("C:\Users\lenovo\Desktop\Karishma\Stocks\SP500_Symbols.csv")

## Update Script.py
## This scripts updates the data for stock prices for the latest date

import quandl
import pandas as pd
import csv
import datetime as DT

#Get a list of S&P 500 tickers
Tickers = pd.read_csv("C:\Users\lenovo\Desktop\Karishma\Stocks\SP500_Symbols.csv")
quandl.ApiConfig.api_key = "xxxxxxxxxxxxxxxx"

## Creating Datasets - I.py
## This script creates new normalized input variables and ML models to be used for prediction

import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score

## MovingAverages.py
#Calculate moving averages and normalize the below variables and add to the dataset 'df'
            VolumeMA = df['Volume'].rolling(window=50).mean()
            AdjClosedMA_50 = df['Adj. Close'].rolling(window=50).mean()
            AdjClosedMA_200 = df['Adj. Close'].rolling(window=200).mean()
            df['VolumeN']= ((df['Volume']-VolumeMA)/VolumeMA)*100
            df['AdjClosedN_50'] = ((df['Adj. Close']- AdjClosedMA_50)/AdjClosedMA_50)*100
            df['AdjClosedN_200'] = ((df['Adj. Close']- AdjClosedMA_200)/AdjClosedMA_200)*100

            #Converting numDays and minProfit to string for the purpose of exporting to csv files
            numDaysF = float(numDays)

## Model.py
 ## Creating SVM model
            X = df[['HighN','LowN','OpenN','VolumeN', 'pctChange','AdjClosedN_50', 'AdjClosedN_200']]
            y = df['Class']
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state = 42)
            ros = RandomOverSampler()
            X_train_oversampled,y_train_oversampled = ros.fit_sample(X_train, y_train)
            clf = svm.SVC()
            clf.fit(X_train_oversampled,y_train_oversampled)
            predict = clf.predict(X_test)
            cm = confusion_matrix(y_test, predict)

## Twitter_1.py
import tweepy
import json
import pandas as pd
from scipy.misc import imread
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib as mpl
import csv
import matplotlib.pyplot as plt

import operator

## Twitter_2.py
#Authentication

consumer_key = 'xxxxxxxxxxxxxxxxxxxxxxx'
consumer_secret = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
access_token = 'xxxxxxxxxxxxx-xxxxxxxxxxxxxxx'
access_token_secret = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'

auth = tweepy.OAuthHandler(consumer_key, consumer_secret) #Interacting with twitter's API
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API (auth) #creating the API object

## Twitter_3.py
#Store tweets data in a dataframe

def tweets_df(results):
    id_list = [tweet.id for tweet  in results]
    data_set = pd.DataFrame(id_list, columns = ["id"])

    data_set["text"] = [tweet.text for tweet in results]
    data_set["created_at"] = [tweet.created_at for tweet in results]
    data_set["retweet_count"] = [tweet.retweet_count for tweet in results]
    data_set["user_screen_name"] = [tweet.author.screen_name for tweet in results]
	## This scripts downloads historical stock prices and Vix Data for a given period.

	import quandl
	import pandas as pd
	import csv


	#Get a list of S&P 500 tickers
	Tickers = pd.read_csv("C:\Users\lenovo\Desktop\Karishma\Stocks\SP500_Symbols.csv")
	## This scripts updates the data for stock prices for the latest date

	import quandl
	import pandas as pd
	import csv
	import datetime as DT

	#Get a list of S&P 500 tickers
	Tickers = pd.read_csv("C:\Users\lenovo\Desktop\Karishma\Stocks\SP500_Symbols.csv")
	quandl.ApiConfig.api_key = "xxxxxxxxxxxxxxxx"
	## This script creates new normalized input variables and ML models to be used for prediction

	import pandas as pd
	import numpy as np
	from sklearn import svm
	from sklearn.model_selection import train_test_split
	from imblearn.over_sampling import RandomOverSampler
	from sklearn.metrics import confusion_matrix
	from sklearn.metrics import accuracy_score
	from sklearn.metrics import precision_score
	#Calculate moving averages and normalize the below variables and add to the dataset 'df'
	VolumeMA = df['Volume'].rolling(window=50).mean()
	AdjClosedMA_50 = df['Adj. Close'].rolling(window=50).mean()
	AdjClosedMA_200 = df['Adj. Close'].rolling(window=200).mean()
	df['VolumeN']= ((df['Volume']-VolumeMA)/VolumeMA)*100
	df['AdjClosedN_50'] = ((df['Adj. Close']- AdjClosedMA_50)/AdjClosedMA_50)*100
	df['AdjClosedN_200'] = ((df['Adj. Close']- AdjClosedMA_200)/AdjClosedMA_200)*100

	#Converting numDays and minProfit to string for the purpose of exporting to csv files
	numDaysF = float(numDays)
	## Creating SVM model
	X = df[['HighN','LowN','OpenN','VolumeN', 'pctChange','AdjClosedN_50', 'AdjClosedN_200']]
	y = df['Class']
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state = 42)
	ros = RandomOverSampler()
	X_train_oversampled,y_train_oversampled = ros.fit_sample(X_train, y_train)
	clf = svm.SVC()
	clf.fit(X_train_oversampled,y_train_oversampled)
	predict = clf.predict(X_test)
	cm = confusion_matrix(y_test, predict)
	import tweepy
	import json
	import pandas as pd
	from scipy.misc import imread
	from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
	import matplotlib as mpl
	import csv
	import matplotlib.pyplot as plt

	import operator
	#Authentication

	consumer_key = 'xxxxxxxxxxxxxxxxxxxxxxx'
	consumer_secret = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
	access_token = 'xxxxxxxxxxxxx-xxxxxxxxxxxxxxx'
	access_token_secret = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'

	auth = tweepy.OAuthHandler(consumer_key, consumer_secret) #Interacting with twitter's API
	auth.set_access_token(access_token, access_token_secret)
	api = tweepy.API (auth) #creating the API object
	#Store tweets data in a dataframe

	def tweets_df(results):
	id_list = [tweet.id for tweet in results]
	data_set = pd.DataFrame(id_list, columns = ["id"])

	data_set["text"] = [tweet.text for tweet in results]
	data_set["created_at"] = [tweet.created_at for tweet in results]
	data_set["retweet_count"] = [tweet.retweet_count for tweet in results]
	data_set["user_screen_name"] = [tweet.author.screen_name for tweet in results]