df_id = pd.read_csv('links.csv', sep=',') | |
idx_to_movie = {} | |
for row in df_id.itertuples(): | |
idx_to_movie[row[1]-1] = row[2] | |
total_movies = 9000 | |
movies = [0]*total_movies | |
for i in range(len(movies)): | |
if i in idx_to_movie.keys() and len(str(idx_to_movie[i])) == 6: | |
movies[i] = (idx_to_movie[i]) | |
movies = filter(lambda imdb: imdb != 0, movies) | |
total_movies = len(movies) | |
URL = [0]*total_movies | |
IMDB = [0]*total_movies | |
URL_IMDB = {"url":[],"imdb":[]} | |
i = 0 | |
for movie in movies: | |
(URL[i], IMDB[i]) = get_poster(movie, base_url) | |
if URL[i] != base_url+"": | |
URL_IMDB["url"].append(URL[i]) | |
URL_IMDB["imdb"].append(IMDB[i]) | |
i += 1 | |
# URL = filter(lambda url: url != base_url+"", URL) | |
df = pd.DataFrame(data=URL_IMDB) | |
total_movies = len(df) | |
import urllib | |
poster_path = "/Users/wannjiun/Desktop/nycdsa/project_5_recommender/posters/" | |
for i in range(total_movies): | |
urllib.urlretrieve(df.url[i], poster_path + str(i) + ".jpg") | |
from keras.applications import VGG16 | |
from keras.applications.vgg16 import preprocess_input | |
from keras.preprocessing import image as kimage | |
image = [0]*total_movies | |
x = [0]*total_movies | |
for i in range(total_movies): | |
image[i] = kimage.load_img(poster_path + str(i) + ".jpg", target_size=(224, 224)) | |
x[i] = kimage.img_to_array(image[i]) | |
x[i] = np.expand_dims(x[i], axis=0) | |
x[i] = preprocess_input(x[i]) | |
model = VGG16(include_top=False, weights='imagenet') | |
prediction = [0]*total_movies | |
matrix_res = np.zeros([total_movies,25088]) | |
for i in range(total_movies): | |
prediction[i] = model.predict(x[i]).ravel() | |
matrix_res[i,:] = prediction[i] | |
similarity_deep = matrix_res.dot(matrix_res.T) | |
norms = np.array([np.sqrt(np.diagonal(similarity_deep))]) | |
similarity_deep = similarity_deep / norms / norms.T |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment