This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_dataset(dataset, look_back=1): | |
dataX, dataY = [], [] | |
for i in range(len(dataset)-look_back-1): | |
a = dataset[i:(i+look_back), 0] | |
dataX.append(a) | |
dataY.append(dataset[i + look_back, 0]) | |
return np.array(dataX), np.array(dataY) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!rm -rf ./logs/ | |
keras.backend.clear_session() | |
%load_ext tensorboard | |
model = Sequential() | |
# Adding the input layer | |
model.add(LSTM(units=48, activation='tanh', kernel_initializer=tf.keras.initializers.glorot_uniform(seed=26), input_shape = (X_train.shape[1], 1))) | |
# Adding the output layer |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics import mean_squared_error | |
from math import sqrt | |
predictions = pd.DataFrame(predictions_ARIMA_diff_cumsum.values) | |
predictions.set_index(test_data.index, inplace = True) | |
plt.figure(figsize=(12,7)) | |
plt.plot(train_data['price'], label = 'Train Data') | |
plt.plot(test_data['price'], label = 'Test Data') | |
plt.plot(predictions, label = 'Predicted Values') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Adding some new words to Vader Dictionary to judge stock market news better. | |
new_words = {'falls': -9, 'drops': -9, 'rise': 9, 'increases': 9, 'gain': 9, 'hiked': -9, 'dips': -9, 'declines': -9, | |
'decline': -9, 'hikes': -9, 'jumps': 9, 'lose': -9, 'profit': 9, 'loss': -9, 'shreds': -9, 'sell': -9, 'buy': 9, 'recession': -9, | |
'rupee weakens': -9, 'record low': -9, 'record high': 9, 'sensex up': 9, 'nifty down': -9, 'sensex down': -9, 'nifty up': 9} | |
analyser = SentimentIntensityAnalyzer() | |
analyser.lexicon.update(new_words) | |
for i in tqdm(tweet_news.itertuples()): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Cleaning the tweets | |
def decontracted(phrase): | |
# specific | |
phrase = re.sub(r"won't", "will not", phrase) | |
phrase = re.sub(r"can\'t", "can not", phrase) | |
# general | |
phrase = re.sub(r"n\'t", " not", phrase) | |
phrase = re.sub(r"\'re", " are", phrase) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Converting date to proper format | |
month_dict = {'Jan': '01', 'Feb': '02', 'Mar': '03', 'Apr': '04', 'May': '05', 'Jun': '06', 'Jul': '07', 'Aug': '08', | |
'Sep': '09', 'Oct': '10', 'Nov': '11', 'Dec': '12'} | |
for i in tqdm(nifty.itertuples()): | |
date_list = i[1].split() | |
month = month_dict[date_list[0]] | |
year = date_list[2] | |
date = date_list[1][:-1] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Cleaning the tweets | |
def decontracted(phrase): | |
# specific | |
phrase = re.sub(r"won't", "will not", phrase) | |
phrase = re.sub(r"can\'t", "can not", phrase) | |
# general | |
phrase = re.sub(r"n\'t", " not", phrase) | |
phrase = re.sub(r"\'re", " are", phrase) | |
phrase = re.sub(r"\'s", " is", phrase) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#configuration | |
config = twint.Config() | |
config.Username = "NDTVProfit" | |
config.Lang = "en" | |
config.Since = "2015–01–01" | |
config.Until = "2020-01-01" | |
config.Store_csv = True | |
config.Output = "NDTVProfit.csv" | |
#running search |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Data is scraped from this url: https://in.investing.com/indices/s-p-cnx-nifty-historical-data?end_date=1577817000&st_date=946665000 | |
url = 'https://in.investing.com/indices/s-p-cnx-nifty-historical-data?end_date=1577817000&st_date=946665000' | |
req = Request(url , headers={'User-Agent': 'Mozilla/5.0'}) | |
webpage = urlopen(req).read() | |
page_soup = soup(webpage, "html.parser") | |
date_raw = page_soup.find("div", {'class':'common-table-scroller js-table-scroller'}).find_all("td", {"class":"col-rowDate "}) | |
date = [x.text.split("\n")[1] for x in date_raw] | |
price_raw = [] | |
for i in range(4972): |
NewerOlder