Created
May 10, 2018 21:02
-
-
Save PomoML/f940ae18237552ce419293a9b774f23a to your computer and use it in GitHub Desktop.
lesson5-movielens-NN Only.ipynb, with bug
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"heading_collapsed": true | |
}, | |
"source": [ | |
"## Movielens" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"hidden": true | |
}, | |
"outputs": [], | |
"source": [ | |
"%reload_ext autoreload\n", | |
"%autoreload 2\n", | |
"%matplotlib inline\n", | |
"\n", | |
"from fastai.learner import *\n", | |
"from fastai.column_data import *" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"hidden": true | |
}, | |
"source": [ | |
"Data available from http://files.grouplens.org/datasets/movielens/ml-latest-small.zip" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"hidden": true | |
}, | |
"outputs": [], | |
"source": [ | |
"path='data/ml-latest-small/'" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"hidden": true | |
}, | |
"source": [ | |
"We're working with the movielens data, which contains one rating per row, like this:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"hidden": true | |
}, | |
"outputs": [], | |
"source": [ | |
"ratings = pd.read_csv(path+'ratings.csv')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"heading_collapsed": true | |
}, | |
"source": [ | |
"## Collaborative filtering" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"hidden": true | |
}, | |
"outputs": [], | |
"source": [ | |
"val_idxs = get_cv_idxs(len(ratings))\n", | |
"n_factors = 50" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"hidden": true | |
}, | |
"outputs": [], | |
"source": [ | |
"#cf = CollabFilterDataset.from_csv(path, 'ratings.csv', 'userId', 'movieId', 'rating')\n", | |
"#learn = cf.get_learner(n_factors, val_idxs, 64, opt_fn=optim.Adam)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"hidden": true | |
}, | |
"outputs": [], | |
"source": [ | |
"u_uniq = ratings.userId.unique()\n", | |
"user2idx = {o:i for i,o in enumerate(u_uniq)}\n", | |
"ratings.userId = ratings.userId.apply(lambda x: user2idx[x])\n", | |
"\n", | |
"m_uniq = ratings.movieId.unique()\n", | |
"movie2idx = {o:i for i,o in enumerate(m_uniq)}\n", | |
"ratings.movieId = ratings.movieId.apply(lambda x: movie2idx[x])\n", | |
"\n", | |
"n_users=int(ratings.userId.nunique())\n", | |
"n_movies=int(ratings.movieId.nunique())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"hidden": true | |
}, | |
"outputs": [], | |
"source": [ | |
"x = ratings.drop(['rating', 'timestamp'],axis=1)\n", | |
"y = ratings['rating'].astype(np.float32)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"hidden": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data = ColumnarModelData.from_data_frame(path, val_idxs, x, y, ['userId', 'movieId'], 64)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"hidden": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(0.5, 5.0)" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"min_rating,max_rating = ratings.rating.min().item(),ratings.rating.max().item()\n", | |
"min_rating,max_rating" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#next(iter(data.trn_dl))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Mini net" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"code_folding": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"kValue = -1\n", | |
"class EmbeddingNet(nn.Module):\n", | |
" def __init__(self, n_users, n_movies, nh=10, p1=0.05, p2=0.5):\n", | |
" super().__init__()\n", | |
" (self.u, self.m) = [get_emb(*o) for o in [\n", | |
" (n_users, n_factors), (n_movies, n_factors)]]\n", | |
" self.lin1 = nn.Linear(n_factors*2, nh)\n", | |
" self.lin2 = nn.Linear(nh, 1)\n", | |
" self.drop1 = nn.Dropout(p1)\n", | |
" self.drop2 = nn.Dropout(p2)\n", | |
" self.kv = nn.Parameter(torch.FloatTensor(1).fill_(.5), requires_grad=True).cuda() #set k initial value\n", | |
" \n", | |
" def sigscale1(self,y,k):\n", | |
" return F.sigmoid(y) * (max_rating-min_rating+2*k) + min_rating-k\n", | |
" \n", | |
" def forward(self, cats, conts):\n", | |
" global kValue\n", | |
" users,movies = cats[:,0],cats[:,1]\n", | |
" x = self.drop1(torch.cat([self.u(users),self.m(movies)], dim=1))\n", | |
" x = self.drop2(F.relu(self.lin1(x)))\n", | |
" kValue = self.kv.data.cpu()[0]\n", | |
" return self.sigscale1(self.lin2(x),self.kv)\n", | |
" #return F.sigmoid(self.lin2(x)) * (max_rating-min_rating+1) + min_rating-0.5 #scrunches 0 to 5.5 but no symmetry\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"wd=1e-5\n", | |
"model = EmbeddingNet(n_users, n_movies).cuda()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#Save initial state once, and restore here to make consistent comparisons.\n", | |
"if False:\n", | |
" torch.cuda.manual_seed_all(11111)\n", | |
" torch.manual_seed(11111)\n", | |
" model.load_state_dict(torch.load(\"MLModel.pt\"))\n", | |
"else:\n", | |
" torch.save(model.state_dict(), \"MLModel.pt\")\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "2d3e3ae8e0fb4f3e931f74dbc830b970", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"HBox(children=(IntProgress(value=0, description='Epoch', max=12), HTML(value='')))" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"epoch trn_loss val_loss \n", | |
" 0 0.858251 0.812676 \n", | |
" 1 0.843336 0.79294 \n", | |
" 2 0.778295 0.784312 \n", | |
" 3 0.76255 0.787983 \n", | |
" 4 0.758702 0.791481 \n", | |
" 5 0.752285 0.78832 \n", | |
" 6 0.745778 0.791469 \n", | |
" 7 0.729907 0.794116 \n", | |
" 8 0.760971 0.790681 \n", | |
" 9 0.747286 0.79349 \n", | |
" 10 0.716794 0.792652 \n", | |
" 11 0.736462 0.794784 \n", | |
"\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"[array([0.79478])]" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"opt = optim.Adam(model.parameters(), 1e-3, weight_decay=wd)\n", | |
"\n", | |
"fit(model, data, 12, opt, F.mse_loss)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.5" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"kValue" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"set_lrs(opt, 1e-3)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "e90674bd1f5e47c2965bb9064dd6d097", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"HBox(children=(IntProgress(value=0, description='Epoch', max=3), HTML(value='')))" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"epoch trn_loss val_loss \n", | |
" 0 0.717925 0.792505 \n", | |
" 1 0.745821 0.793137 \n", | |
" 2 0.712673 0.7943 \n", | |
"\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"[array([0.7943])]" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"fit(model, data, 3, opt, F.mse_loss)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
}, | |
"toc": { | |
"colors": { | |
"hover_highlight": "#DAA520", | |
"navigate_num": "#000000", | |
"navigate_text": "#333333", | |
"running_highlight": "#FF0000", | |
"selected_highlight": "#FFD700", | |
"sidebar_border": "#EEEEEE", | |
"wrapper_background": "#FFFFFF" | |
}, | |
"moveMenuLeft": true, | |
"nav_menu": { | |
"height": "123px", | |
"width": "252px" | |
}, | |
"navigate_menu": true, | |
"number_sections": true, | |
"sideBar": true, | |
"threshold": 4, | |
"toc_cell": false, | |
"toc_section_display": "block", | |
"toc_window_display": false, | |
"widenNotebook": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment