Saransh Rajput saranshrajput

## data_preprocessing.py
import pandas as pd
import scipy.sparse as sparse
import numpy as np
import random
import implicit
from sklearn.preprocessing import MinMaxScaler

articles_df = pd.read_csv('shared_articles.csv')
interactions_df = pd.read_csv('users_interactions.csv')
articles_df.drop(['authorUserAgent', 'authorRegion', 'authorCountry'], axis=1, inplace=True)

## ALS.py
grouped_df['title'] = grouped_df['title'].astype("category")
grouped_df['personId'] = grouped_df['personId'].astype("category")
grouped_df['contentId'] = grouped_df['contentId'].astype("category")
grouped_df['person_id'] = grouped_df['personId'].cat.codes
grouped_df['content_id'] = grouped_df['contentId'].cat.codes

sparse_content_person = sparse.csr_matrix((grouped_df['eventStrength'].astype(float), (grouped_df['content_id'], grouped_df['person_id'])))
sparse_person_content = sparse.csr_matrix((grouped_df['eventStrength'].astype(float), (grouped_df['person_id'], grouped_df['content_id'])))

model = implicit.als.AlternatingLeastSquares(factors=20, regularization=0.1, iterations=50)

## SimilarContent.py
content_id = 450
n_similar = 10

person_vecs = model.user_factors
content_vecs = model.item_factors

content_norms = np.sqrt((content_vecs * content_vecs).sum(axis=1))

scores = content_vecs.dot(content_vecs[content_id]) / content_norms
top_idx = np.argpartition(scores, -n_similar)[-n_similar:]

## recommend.py
def recommend(person_id, sparse_person_content, person_vecs, content_vecs, num_contents=10):
    # Get the interactions scores from the sparse person content matrix
    person_interactions = sparse_person_content[person_id,:].toarray()
    # Add 1 to everything, so that articles with no interaction yet become equal to 1
    person_interactions = person_interactions.reshape(-1) + 1
    # Make articles already interacted zero
    person_interactions[person_interactions > 1] = 0
    # Get dot product of person vector and all content vectors
    rec_vector = person_vecs[person_id,:].dot(content_vecs.T).toarray()


## ibraries.py
import math
import pandas_datareader as web
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

## plot.py
plt.figure(figsize=(16,8))
plt.title('Close Price History')
plt.plot(df['Close'])
plt.xlabel('Date',fontsize=18)
plt.ylabel('Close Price USD ($)',fontsize=18)
plt.show()

## Plot.py
plt.figure(figsize=(16,8))
plt.title('Close Price History')
plt.plot(df['Close'])
plt.xlabel('Date',fontsize=18)
plt.ylabel('Close Price USD ($)',fontsize=18)
plt.show()

## scaling_data.py
#Create a new dataframe with only the 'Close' column
data = df.filter(['Close'])#Converting the dataframe to a numpy array
dataset = data.values#Get /Compute the number of rows to train the model on
training_data_len = math.ceil( len(dataset) *.8)

#Scale the all of the data to be values between 0 and 1
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(dataset)

## model.py
model = Sequential()
model.add(LSTM(units=50, return_sequences=True,input_shape=(x_train.shape[1],1)))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dense(units=25))
model.add(Dense(units=1))

#Compiling the Model
model.compile(optimizer='adam', loss='mean_squared_error')

#Training the Modle

## Test_data.py
#Test data set
test_data = scaled_data[training_data_len - 60: , : ]#Create the x_test and y_test data sets
x_test = []
y_test =  dataset[training_data_len : , : ] #Get all of the rows from index 1603 to the rest and all of the columns (in this case it's only column 'Close'), so 2003 - 1603 = 400 rows of data
for i in range(60,len(test_data)):
    x_test.append(test_data[i-60:i,0])
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0],x_test.shape[1],1))
	import pandas as pd
	import scipy.sparse as sparse
	import numpy as np
	import random
	import implicit
	from sklearn.preprocessing import MinMaxScaler

	articles_df = pd.read_csv('shared_articles.csv')
	interactions_df = pd.read_csv('users_interactions.csv')
	articles_df.drop(['authorUserAgent', 'authorRegion', 'authorCountry'], axis=1, inplace=True)
	grouped_df['title'] = grouped_df['title'].astype("category")
	grouped_df['personId'] = grouped_df['personId'].astype("category")
	grouped_df['contentId'] = grouped_df['contentId'].astype("category")
	grouped_df['person_id'] = grouped_df['personId'].cat.codes
	grouped_df['content_id'] = grouped_df['contentId'].cat.codes

	sparse_content_person = sparse.csr_matrix((grouped_df['eventStrength'].astype(float), (grouped_df['content_id'], grouped_df['person_id'])))
	sparse_person_content = sparse.csr_matrix((grouped_df['eventStrength'].astype(float), (grouped_df['person_id'], grouped_df['content_id'])))

	model = implicit.als.AlternatingLeastSquares(factors=20, regularization=0.1, iterations=50)
	content_id = 450
	n_similar = 10

	person_vecs = model.user_factors
	content_vecs = model.item_factors

	content_norms = np.sqrt((content_vecs * content_vecs).sum(axis=1))

	scores = content_vecs.dot(content_vecs[content_id]) / content_norms
	top_idx = np.argpartition(scores, -n_similar)[-n_similar:]
	def recommend(person_id, sparse_person_content, person_vecs, content_vecs, num_contents=10):
	# Get the interactions scores from the sparse person content matrix
	person_interactions = sparse_person_content[person_id,:].toarray()
	# Add 1 to everything, so that articles with no interaction yet become equal to 1
	person_interactions = person_interactions.reshape(-1) + 1
	# Make articles already interacted zero
	person_interactions[person_interactions > 1] = 0
	# Get dot product of person vector and all content vectors
	rec_vector = person_vecs[person_id,:].dot(content_vecs.T).toarray()
	import math
	import pandas_datareader as web
	import numpy as np
	import pandas as pd
	from sklearn.preprocessing import MinMaxScaler
	from keras.models import Sequential
	from keras.layers import Dense, LSTM
	import matplotlib.pyplot as plt
	plt.style.use('fivethirtyeight')
	plt.figure(figsize=(16,8))
	plt.title('Close Price History')
	plt.plot(df['Close'])
	plt.xlabel('Date',fontsize=18)
	plt.ylabel('Close Price USD ($)',fontsize=18)
	plt.show()
	#Create a new dataframe with only the 'Close' column
	data = df.filter(['Close'])#Converting the dataframe to a numpy array
	dataset = data.values#Get /Compute the number of rows to train the model on
	training_data_len = math.ceil( len(dataset) *.8)

	#Scale the all of the data to be values between 0 and 1
	scaler = MinMaxScaler(feature_range=(0, 1))
	scaled_data = scaler.fit_transform(dataset)
	model = Sequential()
	model.add(LSTM(units=50, return_sequences=True,input_shape=(x_train.shape[1],1)))
	model.add(LSTM(units=50, return_sequences=False))
	model.add(Dense(units=25))
	model.add(Dense(units=1))

	#Compiling the Model
	model.compile(optimizer='adam', loss='mean_squared_error')

	#Training the Modle
	#Test data set
	test_data = scaled_data[training_data_len - 60: , : ]#Create the x_test and y_test data sets
	x_test = []
	y_test = dataset[training_data_len : , : ] #Get all of the rows from index 1603 to the rest and all of the columns (in this case it's only column 'Close'), so 2003 - 1603 = 400 rows of data
	for i in range(60,len(test_data)):
	x_test.append(test_data[i-60:i,0])
	x_test = np.array(x_test)
	x_test = np.reshape(x_test, (x_test.shape[0],x_test.shape[1],1))