Skip to content

Instantly share code, notes, and snippets.

Avatar

Dario Radečić dradecic

  • NEOS
  • Zagreb
View GitHub Profile
View linreg_gradient_descent.py
b0, b1 = 0.0, 1.0
lr = 0.001
epochs = 10000
error = []
# run 10000 times
for epoch in range(epochs):
# initialize to 0 -> cost of epoch, Jb_0, Jb_1
epoch_cost, cost_b0, cost_b1 = 0, 0, 0
View recommender1_8_scatter.py
ratings_df = pd.DataFrame()
ratings_df['Mean_Rating'] = data.groupby('title')['rating'].mean().values
ratings_df['Num_Ratings'] = data.groupby('title')['rating'].count().values
fig, ax = plt.subplots(figsize=(14, 7))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set_title('Rating vs. Number of Ratings', fontsize=24, pad=20)
ax.set_xlabel('Rating', fontsize=16, labelpad=20)
View recommender1_7_top10.py
data.sort_values(by='numRatings', ascending=False).drop_duplicates('movieId')[:10]
View recommender1_6_num_ratings.py
num_ratings = pd.DataFrame(data.groupby('movieId').count()['rating']).reset_index()
data = pd.merge(left=data, right=num_ratings, on='movieId')
data.rename(columns={'rating_x': 'rating', 'rating_y': 'numRatings'}, inplace=True)
@dradecic
dradecic / recommender1_5_rating_by_genre.py
Created Sep 29, 2019
recommender1_5_rating_by_genre
View recommender1_5_rating_by_genre.py
values = defaultdict(list)
for ind, row in data.iterrows():
for genre in row['genres'].split('|'):
values[genre].append(row['rating'])
genre_lst, rating_lst = [], []
for key, item in values.items():
if key not in [0, 1]:
genre_lst.append(key)
View recommender1_bar_chart.py
def make_bar_chart(dataset, attribute, bar_color='#3498db', edge_color='#2980b9', title='Title', xlab='X', ylab='Y', sort_index=False):
if sort_index == False:
xs = dataset[attribute].value_counts().index
ys = dataset[attribute].value_counts().values
else:
xs = dataset[attribute].value_counts().sort_index().index
ys = dataset[attribute].value_counts().sort_index().values
fig, ax = plt.subplots(figsize=(14, 7))
View recommender1_4_genres.py
genre_df = pd.DataFrame(data['genres'].str.split('|').tolist(), index=data['movieId']).stack()
genre_df = genre_df.reset_index([0, 'movieId'])
genre_df.columns = ['movieId', 'Genre']
View recommender1_3_histogram.py
def make_histogram(dataset, attribute, bins=25, bar_color='#3498db', edge_color='#2980b9', title='Title', xlab='X', ylab='Y', sort_index=False):
if attribute == 'moviePubYear':
dataset = dataset[dataset['moviePubYear'] != 9999]
fig, ax = plt.subplots(figsize=(14, 7))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set_title(title, fontsize=24, pad=20)
ax.set_xlabel(xlab, fontsize=16, labelpad=20)
ax.set_ylabel(ylab, fontsize=16, labelpad=20)
@dradecic
dradecic / recommender_1_obtaining_years.py
Created Sep 29, 2019
recommender_1_obtaining_years
View recommender_1_obtaining_years.py
years = []
for title in data['title']:
year_subset = title[-5:-1]
try: years.append(int(year_subset))
except: years.append(9999)
data['moviePubYear'] = years
print(len(data[data['moviePubYear'] == 9999]))
View recommender_1_imports.py
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict
# read CSVs
movies = pd.read_csv('data/movies.csv')
ratings = pd.read_csv('data/ratings.csv')