Created
June 1, 2019 13:38
-
-
Save mjacar/0c9809b96513daff84fe3d9938f08638 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"%reload_ext autoreload\n", | |
"%autoreload 2\n", | |
"%matplotlib inline" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from fastai.text import *\n", | |
"import random\n", | |
"import numpy as np\n", | |
"seed = 42" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Data Retrieval" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"bs=64" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"path = untar_data(URLs.IMDB)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data_lm = (TextList.from_folder(path)\n", | |
" #Inputs: all the text files in path\n", | |
" .filter_by_folder(include=['train', 'test', 'unsup']) \n", | |
" #We may have other temp folders that contain text files so we only keep what's in train and test\n", | |
" .split_by_rand_pct(0.1)\n", | |
" #We randomly split and keep 10% (10,000 reviews) for validation\n", | |
" .label_for_lm() \n", | |
" #We want to do a language model so we label accordingly\n", | |
" .databunch(bs=bs))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Model Initialization" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"learn = language_model_learner(data_lm, AWD_LSTM, drop_mult=0.3)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"learn.save('initial_model')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# FastAI Implementation\n", | |
"\n", | |
"Note that the FastAI library uses the AdamW optimizer by default." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"os.environ['PYTHONHASHSEED'] = str(seed)\n", | |
"torch.cuda.manual_seed(seed)\n", | |
"torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.\n", | |
"np.random.seed(seed) # Numpy module.\n", | |
"random.seed(seed) # Python random module.\n", | |
"torch.manual_seed(seed)\n", | |
"torch.backends.cudnn.benchmark = False\n", | |
"torch.backends.cudnn.deterministic = True\n", | |
"\n", | |
"def _init_fn(worker_id):\n", | |
" np.random.seed(int(seed))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"model = learn.model\n", | |
"model.train()\n", | |
"\n", | |
"learn.data.train_dl.dataset.shuffle = False\n", | |
"ds = learn.data.train_dl.dl.dataset\n", | |
"train_dl = torch.utils.data.DataLoader(ds, batch_size=64, num_workers=1, pin_memory=True)\n", | |
"train_dl.worker_init_fn = _init_fn\n", | |
"\n", | |
"learn.data.valid_dl.dataset.shuffle = False\n", | |
"ds = learn.data.valid_dl.dl.dataset\n", | |
"valid_dl = torch.utils.data.DataLoader(ds, batch_size=64, num_workers=1, pin_memory=True)\n", | |
"valid_dl.worker_init_fn = _init_fn\n", | |
"\n", | |
"opt = learn.opt\n", | |
"opt.lr = 1e-3\n", | |
"opt.wd = 1e-2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import torch.nn.functional as F\n", | |
"\n", | |
"def loss_and_accuracy(model, xb, yb):\n", | |
" logits = model(xb)[0]\n", | |
" logits_size = logits.size()\n", | |
" loss = F.cross_entropy(logits.view([logits_size[0] * logits_size[1], -1]), yb.view(-1))\n", | |
" \n", | |
" n = yb.shape[0]\n", | |
" input = logits.argmax(dim=-1).view(n,-1)\n", | |
" targs = yb.view(n,-1)\n", | |
" accuracy = (input==targs).float().mean()\n", | |
" \n", | |
" return loss, accuracy" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for xb, yb in train_dl:\n", | |
" l, _ = loss_and_accuracy(model, xb.cuda(), yb.cuda())\n", | |
" l.backward()\n", | |
" opt.step()\n", | |
" opt.zero_grad()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Valid accuracy: 0.2949907813791357\n", | |
"Valid loss: 4.021796843980787\n" | |
] | |
} | |
], | |
"source": [ | |
"model.eval()\n", | |
"f_losses = []\n", | |
"f_accuracies = []\n", | |
"batch_sizes = []\n", | |
"\n", | |
"for xb, yb in valid_dl:\n", | |
" bs = yb.shape[0]\n", | |
" l, acc = loss_and_accuracy(model, xb.cuda(), yb.cuda())\n", | |
" f_losses.append(l.item())\n", | |
" f_accuracies.append(acc.item())\n", | |
" batch_sizes.append(bs)\n", | |
"\n", | |
"valid_accuracy = sum([f_accuracies[i]*batch_sizes[i] for i in range(len(batch_sizes))])/float(sum(batch_sizes))\n", | |
"valid_loss = sum([f_losses[i]*batch_sizes[i] for i in range(len(batch_sizes))])/float(sum(batch_sizes))\n", | |
"print(\"Valid accuracy: {}\".format(valid_accuracy))\n", | |
"print(\"Valid loss: {}\".format(valid_loss))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Pytorch Implementation" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"os.environ['PYTHONHASHSEED'] = str(seed)\n", | |
"torch.cuda.manual_seed(seed)\n", | |
"torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.\n", | |
"np.random.seed(seed) # Numpy module.\n", | |
"random.seed(seed) # Python random module.\n", | |
"torch.manual_seed(seed)\n", | |
"torch.backends.cudnn.benchmark = False\n", | |
"torch.backends.cudnn.deterministic = True\n", | |
"def _init_fn(worker_id):\n", | |
" np.random.seed(int(seed))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from torch.optim import AdamW\n", | |
"torch.manual_seed(42)\n", | |
"learn.load('initial_model')\n", | |
"model = learn.model\n", | |
"model.train()\n", | |
"opt = AdamW(model.parameters())\n", | |
"\n", | |
"learn.data.train_dl.dataset.shuffle = False\n", | |
"ds = learn.data.train_dl.dl.dataset\n", | |
"train_dl = torch.utils.data.DataLoader(ds, batch_size=64, num_workers=1, pin_memory=True)\n", | |
"train_dl.worker_init_fn = _init_fn\n", | |
"\n", | |
"learn.data.valid_dl.dataset.shuffle = False\n", | |
"ds = learn.data.valid_dl.dl.dataset\n", | |
"valid_dl = torch.utils.data.DataLoader(ds, batch_size=64, num_workers=1, pin_memory=True)\n", | |
"valid_dl.worker_init_fn = _init_fn" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for xb, yb in train_dl:\n", | |
" l, _ = loss_and_accuracy(model, xb.cuda(), yb.cuda())\n", | |
" l.backward()\n", | |
" opt.step()\n", | |
" opt.zero_grad()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Valid accuracy: 0.2950007311036647\n", | |
"Valid loss: 4.0218084234008105\n" | |
] | |
} | |
], | |
"source": [ | |
"model.eval()\n", | |
"p_losses = []\n", | |
"p_accuracies = []\n", | |
"batch_sizes = []\n", | |
"\n", | |
"for xb, yb in valid_dl:\n", | |
" bs = yb.shape[0]\n", | |
" l, acc = loss_and_accuracy(model, xb.cuda(), yb.cuda())\n", | |
" p_losses.append(l.item())\n", | |
" p_accuracies.append(acc.item())\n", | |
" batch_sizes.append(bs)\n", | |
"\n", | |
"valid_accuracy = sum([p_accuracies[i]*batch_sizes[i] for i in range(len(batch_sizes))])/float(sum(batch_sizes))\n", | |
"valid_loss = sum([p_losses[i]*batch_sizes[i] for i in range(len(batch_sizes))])/float(sum(batch_sizes))\n", | |
"print(\"Valid accuracy: {}\".format(valid_accuracy))\n", | |
"print(\"Valid loss: {}\".format(valid_loss))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Environment (conda_pytorch_p36)", | |
"language": "python", | |
"name": "conda_pytorch_p36" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment