Skip to content

Instantly share code, notes, and snippets.

@PomoML
Created May 10, 2018 21:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save PomoML/f940ae18237552ce419293a9b774f23a to your computer and use it in GitHub Desktop.
Save PomoML/f940ae18237552ce419293a9b774f23a to your computer and use it in GitHub Desktop.
lesson5-movielens-NN Only.ipynb, with bug
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true
},
"source": [
"## Movielens"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"hidden": true
},
"outputs": [],
"source": [
"%reload_ext autoreload\n",
"%autoreload 2\n",
"%matplotlib inline\n",
"\n",
"from fastai.learner import *\n",
"from fastai.column_data import *"
]
},
{
"cell_type": "markdown",
"metadata": {
"hidden": true
},
"source": [
"Data available from http://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"hidden": true
},
"outputs": [],
"source": [
"path='data/ml-latest-small/'"
]
},
{
"cell_type": "markdown",
"metadata": {
"hidden": true
},
"source": [
"We're working with the movielens data, which contains one rating per row, like this:"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"hidden": true
},
"outputs": [],
"source": [
"ratings = pd.read_csv(path+'ratings.csv')"
]
},
{
"cell_type": "markdown",
"metadata": {
"heading_collapsed": true
},
"source": [
"## Collaborative filtering"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"hidden": true
},
"outputs": [],
"source": [
"val_idxs = get_cv_idxs(len(ratings))\n",
"n_factors = 50"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"hidden": true
},
"outputs": [],
"source": [
"#cf = CollabFilterDataset.from_csv(path, 'ratings.csv', 'userId', 'movieId', 'rating')\n",
"#learn = cf.get_learner(n_factors, val_idxs, 64, opt_fn=optim.Adam)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"hidden": true
},
"outputs": [],
"source": [
"u_uniq = ratings.userId.unique()\n",
"user2idx = {o:i for i,o in enumerate(u_uniq)}\n",
"ratings.userId = ratings.userId.apply(lambda x: user2idx[x])\n",
"\n",
"m_uniq = ratings.movieId.unique()\n",
"movie2idx = {o:i for i,o in enumerate(m_uniq)}\n",
"ratings.movieId = ratings.movieId.apply(lambda x: movie2idx[x])\n",
"\n",
"n_users=int(ratings.userId.nunique())\n",
"n_movies=int(ratings.movieId.nunique())"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"hidden": true
},
"outputs": [],
"source": [
"x = ratings.drop(['rating', 'timestamp'],axis=1)\n",
"y = ratings['rating'].astype(np.float32)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"hidden": true
},
"outputs": [],
"source": [
"data = ColumnarModelData.from_data_frame(path, val_idxs, x, y, ['userId', 'movieId'], 64)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"hidden": true
},
"outputs": [
{
"data": {
"text/plain": [
"(0.5, 5.0)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"min_rating,max_rating = ratings.rating.min().item(),ratings.rating.max().item()\n",
"min_rating,max_rating"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"#next(iter(data.trn_dl))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Mini net"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"code_folding": []
},
"outputs": [],
"source": [
"kValue = -1\n",
"class EmbeddingNet(nn.Module):\n",
" def __init__(self, n_users, n_movies, nh=10, p1=0.05, p2=0.5):\n",
" super().__init__()\n",
" (self.u, self.m) = [get_emb(*o) for o in [\n",
" (n_users, n_factors), (n_movies, n_factors)]]\n",
" self.lin1 = nn.Linear(n_factors*2, nh)\n",
" self.lin2 = nn.Linear(nh, 1)\n",
" self.drop1 = nn.Dropout(p1)\n",
" self.drop2 = nn.Dropout(p2)\n",
" self.kv = nn.Parameter(torch.FloatTensor(1).fill_(.5), requires_grad=True).cuda() #set k initial value\n",
" \n",
" def sigscale1(self,y,k):\n",
" return F.sigmoid(y) * (max_rating-min_rating+2*k) + min_rating-k\n",
" \n",
" def forward(self, cats, conts):\n",
" global kValue\n",
" users,movies = cats[:,0],cats[:,1]\n",
" x = self.drop1(torch.cat([self.u(users),self.m(movies)], dim=1))\n",
" x = self.drop2(F.relu(self.lin1(x)))\n",
" kValue = self.kv.data.cpu()[0]\n",
" return self.sigscale1(self.lin2(x),self.kv)\n",
" #return F.sigmoid(self.lin2(x)) * (max_rating-min_rating+1) + min_rating-0.5 #scrunches 0 to 5.5 but no symmetry\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"wd=1e-5\n",
"model = EmbeddingNet(n_users, n_movies).cuda()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"#Save initial state once, and restore here to make consistent comparisons.\n",
"if False:\n",
" torch.cuda.manual_seed_all(11111)\n",
" torch.manual_seed(11111)\n",
" model.load_state_dict(torch.load(\"MLModel.pt\"))\n",
"else:\n",
" torch.save(model.state_dict(), \"MLModel.pt\")\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2d3e3ae8e0fb4f3e931f74dbc830b970",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, description='Epoch', max=12), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch trn_loss val_loss \n",
" 0 0.858251 0.812676 \n",
" 1 0.843336 0.79294 \n",
" 2 0.778295 0.784312 \n",
" 3 0.76255 0.787983 \n",
" 4 0.758702 0.791481 \n",
" 5 0.752285 0.78832 \n",
" 6 0.745778 0.791469 \n",
" 7 0.729907 0.794116 \n",
" 8 0.760971 0.790681 \n",
" 9 0.747286 0.79349 \n",
" 10 0.716794 0.792652 \n",
" 11 0.736462 0.794784 \n",
"\n"
]
},
{
"data": {
"text/plain": [
"[array([0.79478])]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"opt = optim.Adam(model.parameters(), 1e-3, weight_decay=wd)\n",
"\n",
"fit(model, data, 12, opt, F.mse_loss)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.5"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"kValue"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"set_lrs(opt, 1e-3)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e90674bd1f5e47c2965bb9064dd6d097",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, description='Epoch', max=3), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch trn_loss val_loss \n",
" 0 0.717925 0.792505 \n",
" 1 0.745821 0.793137 \n",
" 2 0.712673 0.7943 \n",
"\n"
]
},
{
"data": {
"text/plain": [
"[array([0.7943])]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fit(model, data, 3, opt, F.mse_loss)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
},
"toc": {
"colors": {
"hover_highlight": "#DAA520",
"navigate_num": "#000000",
"navigate_text": "#333333",
"running_highlight": "#FF0000",
"selected_highlight": "#FFD700",
"sidebar_border": "#EEEEEE",
"wrapper_background": "#FFFFFF"
},
"moveMenuLeft": true,
"nav_menu": {
"height": "123px",
"width": "252px"
},
"navigate_menu": true,
"number_sections": true,
"sideBar": true,
"threshold": 4,
"toc_cell": false,
"toc_section_display": "block",
"toc_window_display": false,
"widenNotebook": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment