This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import scipy.sparse as sparse | |
| import numpy as np | |
| import random | |
| import implicit | |
| from sklearn.preprocessing import MinMaxScaler | |
| articles_df = pd.read_csv('shared_articles.csv') | |
| interactions_df = pd.read_csv('users_interactions.csv') | |
| articles_df.drop(['authorUserAgent', 'authorRegion', 'authorCountry'], axis=1, inplace=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| grouped_df['title'] = grouped_df['title'].astype("category") | |
| grouped_df['personId'] = grouped_df['personId'].astype("category") | |
| grouped_df['contentId'] = grouped_df['contentId'].astype("category") | |
| grouped_df['person_id'] = grouped_df['personId'].cat.codes | |
| grouped_df['content_id'] = grouped_df['contentId'].cat.codes | |
| sparse_content_person = sparse.csr_matrix((grouped_df['eventStrength'].astype(float), (grouped_df['content_id'], grouped_df['person_id']))) | |
| sparse_person_content = sparse.csr_matrix((grouped_df['eventStrength'].astype(float), (grouped_df['person_id'], grouped_df['content_id']))) | |
| model = implicit.als.AlternatingLeastSquares(factors=20, regularization=0.1, iterations=50) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| content_id = 450 | |
| n_similar = 10 | |
| person_vecs = model.user_factors | |
| content_vecs = model.item_factors | |
| content_norms = np.sqrt((content_vecs * content_vecs).sum(axis=1)) | |
| scores = content_vecs.dot(content_vecs[content_id]) / content_norms | |
| top_idx = np.argpartition(scores, -n_similar)[-n_similar:] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def recommend(person_id, sparse_person_content, person_vecs, content_vecs, num_contents=10): | |
| # Get the interactions scores from the sparse person content matrix | |
| person_interactions = sparse_person_content[person_id,:].toarray() | |
| # Add 1 to everything, so that articles with no interaction yet become equal to 1 | |
| person_interactions = person_interactions.reshape(-1) + 1 | |
| # Make articles already interacted zero | |
| person_interactions[person_interactions > 1] = 0 | |
| # Get dot product of person vector and all content vectors | |
| rec_vector = person_vecs[person_id,:].dot(content_vecs.T).toarray() | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import math | |
| import pandas_datareader as web | |
| import numpy as np | |
| import pandas as pd | |
| from sklearn.preprocessing import MinMaxScaler | |
| from keras.models import Sequential | |
| from keras.layers import Dense, LSTM | |
| import matplotlib.pyplot as plt | |
| plt.style.use('fivethirtyeight') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| plt.figure(figsize=(16,8)) | |
| plt.title('Close Price History') | |
| plt.plot(df['Close']) | |
| plt.xlabel('Date',fontsize=18) | |
| plt.ylabel('Close Price USD ($)',fontsize=18) | |
| plt.show() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| plt.figure(figsize=(16,8)) | |
| plt.title('Close Price History') | |
| plt.plot(df['Close']) | |
| plt.xlabel('Date',fontsize=18) | |
| plt.ylabel('Close Price USD ($)',fontsize=18) | |
| plt.show() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #Create a new dataframe with only the 'Close' column | |
| data = df.filter(['Close'])#Converting the dataframe to a numpy array | |
| dataset = data.values#Get /Compute the number of rows to train the model on | |
| training_data_len = math.ceil( len(dataset) *.8) | |
| #Scale the all of the data to be values between 0 and 1 | |
| scaler = MinMaxScaler(feature_range=(0, 1)) | |
| scaled_data = scaler.fit_transform(dataset) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| model = Sequential() | |
| model.add(LSTM(units=50, return_sequences=True,input_shape=(x_train.shape[1],1))) | |
| model.add(LSTM(units=50, return_sequences=False)) | |
| model.add(Dense(units=25)) | |
| model.add(Dense(units=1)) | |
| #Compiling the Model | |
| model.compile(optimizer='adam', loss='mean_squared_error') | |
| #Training the Modle |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #Test data set | |
| test_data = scaled_data[training_data_len - 60: , : ]#Create the x_test and y_test data sets | |
| x_test = [] | |
| y_test = dataset[training_data_len : , : ] #Get all of the rows from index 1603 to the rest and all of the columns (in this case it's only column 'Close'), so 2003 - 1603 = 400 rows of data | |
| for i in range(60,len(test_data)): | |
| x_test.append(test_data[i-60:i,0]) | |
| x_test = np.array(x_test) | |
| x_test = np.reshape(x_test, (x_test.shape[0],x_test.shape[1],1)) |
OlderNewer