Skip to content

Instantly share code, notes, and snippets.

View Shagun-25's full-sized avatar
🎯
Focusing

Shagun Kala Shagun-25

🎯
Focusing
View GitHub Profile
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return np.array(dataX), np.array(dataY)
!rm -rf ./logs/
keras.backend.clear_session()
%load_ext tensorboard
model = Sequential()
# Adding the input layer
model.add(LSTM(units=48, activation='tanh', kernel_initializer=tf.keras.initializers.glorot_uniform(seed=26), input_shape = (X_train.shape[1], 1)))
# Adding the output layer
from sklearn.metrics import mean_squared_error
from math import sqrt
predictions = pd.DataFrame(predictions_ARIMA_diff_cumsum.values)
predictions.set_index(test_data.index, inplace = True)
plt.figure(figsize=(12,7))
plt.plot(train_data['price'], label = 'Train Data')
plt.plot(test_data['price'], label = 'Test Data')
plt.plot(predictions, label = 'Predicted Values')
#Adding some new words to Vader Dictionary to judge stock market news better.
new_words = {'falls': -9, 'drops': -9, 'rise': 9, 'increases': 9, 'gain': 9, 'hiked': -9, 'dips': -9, 'declines': -9,
'decline': -9, 'hikes': -9, 'jumps': 9, 'lose': -9, 'profit': 9, 'loss': -9, 'shreds': -9, 'sell': -9, 'buy': 9, 'recession': -9,
'rupee weakens': -9, 'record low': -9, 'record high': 9, 'sensex up': 9, 'nifty down': -9, 'sensex down': -9, 'nifty up': 9}
analyser = SentimentIntensityAnalyzer()
analyser.lexicon.update(new_words)
for i in tqdm(tweet_news.itertuples()):
#Cleaning the tweets
def decontracted(phrase):
# specific
phrase = re.sub(r"won't", "will not", phrase)
phrase = re.sub(r"can\'t", "can not", phrase)
# general
phrase = re.sub(r"n\'t", " not", phrase)
phrase = re.sub(r"\'re", " are", phrase)
#Converting date to proper format
month_dict = {'Jan': '01', 'Feb': '02', 'Mar': '03', 'Apr': '04', 'May': '05', 'Jun': '06', 'Jul': '07', 'Aug': '08',
'Sep': '09', 'Oct': '10', 'Nov': '11', 'Dec': '12'}
for i in tqdm(nifty.itertuples()):
date_list = i[1].split()
month = month_dict[date_list[0]]
year = date_list[2]
date = date_list[1][:-1]
#Cleaning the tweets
def decontracted(phrase):
# specific
phrase = re.sub(r"won't", "will not", phrase)
phrase = re.sub(r"can\'t", "can not", phrase)
# general
phrase = re.sub(r"n\'t", " not", phrase)
phrase = re.sub(r"\'re", " are", phrase)
phrase = re.sub(r"\'s", " is", phrase)
#configuration
config = twint.Config()
config.Username = "NDTVProfit"
config.Lang = "en"
config.Since = "2015–01–01"
config.Until = "2020-01-01"
config.Store_csv = True
config.Output = "NDTVProfit.csv"
#running search
#Data is scraped from this url: https://in.investing.com/indices/s-p-cnx-nifty-historical-data?end_date=1577817000&st_date=946665000
url = 'https://in.investing.com/indices/s-p-cnx-nifty-historical-data?end_date=1577817000&st_date=946665000'
req = Request(url , headers={'User-Agent': 'Mozilla/5.0'})
webpage = urlopen(req).read()
page_soup = soup(webpage, "html.parser")
date_raw = page_soup.find("div", {'class':'common-table-scroller js-table-scroller'}).find_all("td", {"class":"col-rowDate "})
date = [x.text.split("\n")[1] for x in date_raw]
price_raw = []
for i in range(4972):