Skip to content

Instantly share code, notes, and snippets.

@morganmcg1
Created December 21, 2020 22:37
Show Gist options
  • Save morganmcg1/b59087db0edea2ad5e6774d20de3d6d3 to your computer and use it in GitHub Desktop.
Save morganmcg1/b59087db0edea2ad5e6774d20de3d6d3 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mon Dec 21 22:32:59 2020 \n",
"+-----------------------------------------------------------------------------+\n",
"| NVIDIA-SMI 450.66 Driver Version: 450.66 CUDA Version: 11.0 |\n",
"|-------------------------------+----------------------+----------------------+\n",
"| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
"| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
"| | | MIG M. |\n",
"|===============================+======================+======================|\n",
"| 0 GeForce RTX 208... Off | 00000000:65:00.0 Off | N/A |\n",
"| 31% 38C P8 12W / 250W | 28MiB / 11019MiB | 0% Default |\n",
"| | | N/A |\n",
"+-------------------------------+----------------------+----------------------+\n",
" \n",
"+-----------------------------------------------------------------------------+\n",
"| Processes: |\n",
"| GPU GI CI PID Type Process name GPU Memory |\n",
"| ID ID Usage |\n",
"|=============================================================================|\n",
"| 0 N/A N/A 1263 G /usr/lib/xorg/Xorg 9MiB |\n",
"| 0 N/A N/A 1318 G /usr/bin/gnome-shell 14MiB |\n",
"+-----------------------------------------------------------------------------+\n"
]
}
],
"source": [
"!nvidia-smi"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmorgan\u001b[0m (use `wandb login --relogin` to force relogin)\n"
]
}
],
"source": [
"import sys\n",
"if 'google.colab' in sys.modules:\n",
" !pip install -qq einops axial-positional-embedding fastai datasets\n",
" !pip install -qq git+git://github.com/arampacha/reformer_fastai.git\n",
" !pip install -qqq wandb\n",
"!wandb login"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from fastai.basics import *\n",
"from fastai.text.all import *\n",
"from reformer_fastai.tokenizers import SubwordTextEncoder"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load WMT Data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>de</th>\n",
" <th>en</th>\n",
" <th>is_test</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Buchen Sie ein Ferienhaus oder eine Ferienwohnung in Mauritius Nordküste, Mauritius Ostküste, &amp; Mauritius Westküste &amp; Inselmitte direkt beim Vermieter.</td>\n",
" <td>Why stay in a hotel when you can have a fully serviced Mauritius villa rental on Mauritius. Just contact us by the booking form or directly by telephone and we can start the process of finding the perfect holiday villa.... Find any special requirements that you have for your holiday, the Mauritius villa rental you want, trips and excursions.</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>In diesem Zusammenhang und vor dem Hintergrund der Schwachstellen, die bei dem Versuch, die griechische Initiative voranzubringen, deutlich geworden sind, ist der Hinweis wichtig, dass die Kompetenzen von Europol im Januar 2002 ausgeweitet worden sind und nun auch den illegalen Handel mit menschlichen Organen und Geweben einschließen.</td>\n",
" <td>In this context, and in view of the weaknesses detected when trying to move forward the Greek initiative, it is important to mention that Europol's competences were extended in January 2002 to include illicit trade in human organs and tissues.</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" de \\\n",
"0 Buchen Sie ein Ferienhaus oder eine Ferienwohnung in Mauritius Nordküste, Mauritius Ostküste, & Mauritius Westküste & Inselmitte direkt beim Vermieter. \n",
"1 In diesem Zusammenhang und vor dem Hintergrund der Schwachstellen, die bei dem Versuch, die griechische Initiative voranzubringen, deutlich geworden sind, ist der Hinweis wichtig, dass die Kompetenzen von Europol im Januar 2002 ausgeweitet worden sind und nun auch den illegalen Handel mit menschlichen Organen und Geweben einschließen. \n",
"\n",
" en \\\n",
"0 Why stay in a hotel when you can have a fully serviced Mauritius villa rental on Mauritius. Just contact us by the booking form or directly by telephone and we can start the process of finding the perfect holiday villa.... Find any special requirements that you have for your holiday, the Mauritius villa rental you want, trips and excursions. \n",
"1 In this context, and in view of the weaknesses detected when trying to move forward the Greek initiative, it is important to mention that Europol's competences were extended in January 2002 to include illicit trade in human organs and tissues. \n",
"\n",
" is_test \n",
"0 0 \n",
"1 0 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tiny_df=pd.read_feather('WMT14_TINY')\n",
"tiny_df.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"## Load full WMT14 train dataset from HuggingFace datasets\n",
"#!pip install -qq datasets\n",
"#from datasets import load_dataset\n",
"#train_dataset = load_dataset('wmt_t2t')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"df = tiny_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Dataloaders"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# Get train/test split for TINY_WMT14\n",
"train_split = tiny_df.loc[tiny_df.is_test==0].index.values\n",
"test_split = tiny_df.loc[tiny_df.is_test==1].index.values\n",
"\n",
"# Get Vocab for tokenizer\n",
"# !wget -q https://raw.githubusercontent.com/tensorflow/tensor2tensor/master/tensor2tensor/test_data/vocab.translate_ende_wmt32k.32768.subwords\n",
"\n",
"# Set up Sub-Word tokenizer with vocab\n",
"tok = SubwordTextEncoder(filename='./vocab.translate_ende_wmt32k.32768.subwords', add_bos=True, seq_len=256)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LMTensorText([ 0, 4308, 105, 16, 49, 954, 11888, 33707, 5, 26494,\n",
" 16501, 5])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tok('hey is this working? <EOS>')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# def add_eos(text):\n",
"# return text + tok.EOS\n",
"\n",
"def add_eos_id(ids, keep_size=True):\n",
" \"Adds EOS token id to the tensors. If `keep_size==True` remove the last id before appending the EOS token id\"\n",
" if keep_size: \n",
" return torch.cat([ids[:-1], LMTensorText(tok.EOS_ID).unsqueeze(0)])\n",
" else: return torch.cat([ids, LMTensorText(tok.EOS_ID).unsqueeze(0)])"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 139 ms, sys: 6.88 ms, total: 146 ms\n",
"Wall time: 145 ms\n"
]
}
],
"source": [
"%%time\n",
"\n",
"splits = train_split, test_split\n",
"\n",
"# Get text lengths to enable faster init with SortedDL\n",
"df['de_lens'] = df['de'].str.len()\n",
"\n",
"en_tfms = [ColReader(\"en\"), tok, add_eos_id]\n",
"de_tfms = [ColReader(\"de\"), tok, add_eos_id]\n",
"\n",
"# Set up datsets\n",
"dsets = Datasets(df, [en_tfms, de_tfms], splits=splits)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.13 s, sys: 442 ms, total: 1.57 s\n",
"Wall time: 1.57 s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"# Get dataloader\n",
"srtd_dl = partial(SortedDL, shuffle=True, res=df['de_lens'].values[splits[0]])\n",
"dl_kwargs = [{},{'val_res': df['de_lens'].values[splits[1]]}]\n",
"\n",
"# Define padding\n",
"pad_seq2seq = partial(pad_input, pad_idx=tok.PAD_ID, pad_fields=[0,1])\n",
"\n",
"# Set up dataloaders\n",
"dls = dsets.dataloaders(bs=16, before_batch=pad_seq2seq, dl_type = srtd_dl, dl_kwargs = dl_kwargs)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>text</th>\n",
" <th>text_</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>&lt;pad&gt;* @constructor * @param {GMarker} marker * @param {String} text * @param {Number} padding */ function Tooltip(marker, text, padding){ this.marker_ = marker; this.text_ = text; this.padding_ = padding; } Tooltip.prototype = new GOverlay(); Tooltip.prototype.initialize = function(map){ var div = document.createElement(\"div\"); div.appendChild(document.createTextNode(this.text_)); div.className = 'tooltip'; div.style.position = 'absolute'; div.style.visibility = 'hidden'; div.style.backgroundColor = '#FFFFFF'; div.style.fontWeight = 'bold'; div.style.width = '200px'; div.style.height = '22px'&lt;EOS&gt;</td>\n",
" <td>&lt;pad&gt;* @constructor * @param {GMarker} marker * @param {String} text * @param {Number} padding */ function Tooltip(marker, text, padding){ this.marker_ = marker; this.text_ = text; this.padding_ = padding; } Tooltip.prototype = new GOverlay(); Tooltip.prototype.initialize = function(map){ var div = document.createElement(\"div\"); div.appendChild(document.createTextNode(this.text_)); div.className = 'tooltip'; div.style.position = 'absolute'; div.style.visibility = 'hidden'; div.style.backgroundColor = '#FFFFFF'; div.style.fontWeight = 'bold'; div.style.width = '200px'; div.style.height = '22px'&lt;EOS&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>&lt;pad&gt;Thereis, however, one point to which allusion has already been made and which it is important to underline from the outset. We have an internal market concept, we have the Lisbon and Gothenburg strategies, we have European competition policy, we have the Financial Services Action Plan and the Risk Capital Action Plan, we have Article 2, which obliges Member States to pursue a course of economic policy coordination, we have the Stability and Growth Pact, we have the euro, we have EU enlargement and hence the expansion of the internal market into a home market, and we have the eco-social market economy as our model of economic governance, which means competitiveness within a free market, promoting social cohesion while being mindful of its responsibility towards nature and people&lt;EOS&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;</td>\n",
" <td>&lt;pad&gt;Esist aber am Beginn zu betonen - es ist auch bereits angeschnitten worden -, wir haben ein Binnenmarktkonzept, wir haben die Lissabon- und Göteborg-Strategie, wir haben die europäische Wettbewerbspolitik, wir haben den Aktionsplan für Finanzdienstleistungen, den Aktionsplan für Risikokapital, wir haben den Artikel 2, der die Mitgliedstaaten zur Koordination verpflichtet, wir haben den Stabilitäts- und Wachstumspakt, wir haben den Euro, wir haben die Erweiterung der Europäischen Union und damit die Erweiterung des Binnenmarktes zum Heimatmarkt, wir haben das Ordnungsmodell der ökosozialen Marktwirtschaft, was Wettbewerbsfähigkeit in einem freien Markt bedeutet, der den sozialen Zusammenhalt fördert und sich seiner Verantwortung für Natur und Mensch bewusst ist&lt;EOS&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;&lt;pad&gt;</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"dls.show_batch(max_n=2)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((16, 256), (16, 256))"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"o = dls.one_batch()\n",
"o[0].size(), o[1].size()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# !wget https://github.com/haws74516/en_ga_ds/raw/main/en_ga.zip\n",
"# "
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"# import zipfile\n",
"# path = Path()\n",
"# with zipfile.ZipFile('en_ga.zip', 'r') as f:\n",
"# f.extractall(path)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"# path = Path('.')\n",
"\n",
"# df = pd.read_csv(path/'en_ga.csv', index_col=0)\n",
"# df.head()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"# %%time\n",
"# def add_eos(text):\n",
"# return f'{BOS} ' + text + f' {EOS}'\n",
"\n",
"# dblock = DataBlock(blocks=(TextBlock.from_df('en', tok_text_col='en', rules=[add_eos]),\n",
"# TextBlock.from_df('ga', tok_text_col='ga', rules=[add_eos])),\n",
"# get_x=ColReader('en'),\n",
"# get_y=ColReader('ga'), \n",
"# splitter=RandomSplitter())\n",
"\n",
"# dsets = dblock.datasets(df)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"# %%time\n",
"# pad_seq2seq = partial(pad_input, pad_fields=[0,1])\n",
"\n",
"# dl_kwargs = [{'res':df['en_len'].values[dsets.splits[0]]},\n",
"# {'val_res':df['en_len'].values[dsets.splits[1]]}]\n",
"\n",
"# dls = dsets.dataloaders(bs=8, dl_type=SortedDL, before_batch=pad_seq2seq, shuffle_train=True,\n",
"# num_workers=2, dl_kwargs=dl_kwargs)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"# dls.show_batch(max_n=4)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"# o = dls.one_batch()\n",
"# o[0].size(), o[0][0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Learner"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"# use shared vocab\n",
"enc_vocab_sz=dec_vocab_sz=tok.vocab_size\n",
"#enc_vocab_sz=dec_vocab_sz=30000\n",
"# model dim\n",
"d_model = 768"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"# enc_vocab_sz = len(dls.vocab[0])\n",
"# # dec_vocab_sz = len(dls.vocab[1])\n",
"# dec_vocab_sz=enc_vocab_sz"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"from reformer_fastai.transformer import TransformerEncDec\n",
"from reformer_fastai.core import CombineInputOutputCallback, LossTargetShiftCallback, RemoveEOSCallback\n",
"from reformer_fastai.optimizers import adafactor"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"import pdb"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"# dls.cpu(), dls.device"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(125104044, True)"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cbs = [CombineInputOutputCallback(), LossTargetShiftCallback(), RemoveEOSCallback(eos_idx=tok.EOS_ID)]\n",
"# cbs = [CombineInputOutputCallback(), LossTargetShiftCallback()] #, RemoveEOSCallback(eos_idx=tok.EOS_ID)]\n",
"\n",
"learn = Learner(dls, TransformerEncDec(enc_vocab_sz, dec_vocab_sz, d_model=d_model, heads=8, #n_enc_layers=2, n_dec_layers=1, \n",
" max_seq_len=256, pad_idx=tok.PAD_ID, tie_weights=True, shared_emb=True,\n",
" attn_dropout=0.0, ff_dropout=0.0, emb_dropout=0.0,\n",
" pos_enc='fixed'),\n",
" loss_func=CrossEntropyLossFlat(ignore_index=tok.PAD_ID), cbs=cbs, # opt_func=adafactor,\n",
" metrics=[accuracy, Perplexity(), CorpusBLEUMetric()]).to_native_fp16()\n",
"\n",
"total_params(learn.model)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"#os.environ['CUDA_LAUNCH_BLOCKING'] = '1'"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"SuggestedLRs(lr_min=0.02089296132326126, lr_steep=0.17378008365631104)"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"learn.lr_find()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmorgan\u001b[0m (use `wandb login --relogin` to force relogin)\n"
]
},
{
"data": {
"text/html": [
"\n",
" Tracking run with wandb version 0.10.12<br/>\n",
" Syncing run <strong style=\"color:#cdcd00\">triple_sharing_wmt_tiny</strong> to <a href=\"https://wandb.ai\" target=\"_blank\">Weights & Biases</a> <a href=\"https://docs.wandb.com/integrations/jupyter.html\" target=\"_blank\">(Documentation)</a>.<br/>\n",
" Project page: <a href=\"https://wandb.ai/fastai_community/reformer-fastai\" target=\"_blank\">https://wandb.ai/fastai_community/reformer-fastai</a><br/>\n",
" Run page: <a href=\"https://wandb.ai/fastai_community/reformer-fastai/runs/14eier3y\" target=\"_blank\">https://wandb.ai/fastai_community/reformer-fastai/runs/14eier3y</a><br/>\n",
" Run data is saved locally in <code>/home/morgan/ml/projects/reformer_fastai/nbs/exploration/wandb/run-20201221_223327-14eier3y</code><br/><br/>\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<h1>Run(14eier3y)</h1><p></p><iframe src=\"https://wandb.ai/fastai_community/reformer-fastai/runs/14eier3y\" style=\"border:none;width:100%;height:400px\"></iframe>"
],
"text/plain": [
"<wandb.sdk.wandb_run.Run at 0x7f3ce4a8ed50>"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import wandb\n",
"from fastai.callback.wandb import *\n",
"\n",
"WANDB_NAME = 'triple_sharing_wmt_tiny'\n",
"GROUP = 'TEST'\n",
"NOTES = 'Tripe weight sharing with the WMT_TINY dataset, fixed positional embeddings'\n",
"CONFIG = {}\n",
"TAGS =['enc-dec','test','wmt14_tiny']\n",
"\n",
"wandb.init(reinit=True, project=\"reformer-fastai\", entity=\"fastai_community\", \n",
" name=WANDB_NAME, group=GROUP, notes=NOTES, tags=TAGS)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <div>\n",
" <style>\n",
" /* Turns off some styling */\n",
" progress {\n",
" /* gets rid of default border in Firefox and Opera. */\n",
" border: none;\n",
" /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
" background-size: auto;\n",
" }\n",
" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
" background: #F44336;\n",
" }\n",
" </style>\n",
" <progress value='0' class='' max='3' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" 0.00% [0/3 00:00<00:00]\n",
" </div>\n",
" \n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>epoch</th>\n",
" <th>train_loss</th>\n",
" <th>valid_loss</th>\n",
" <th>accuracy</th>\n",
" <th>perplexity</th>\n",
" <th>corpus_bleu</th>\n",
" <th>time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table><p>\n",
"\n",
" <div>\n",
" <style>\n",
" /* Turns off some styling */\n",
" progress {\n",
" /* gets rid of default border in Firefox and Opera. */\n",
" border: none;\n",
" /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
" background-size: auto;\n",
" }\n",
" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
" background: #F44336;\n",
" }\n",
" </style>\n",
" <progress value='1620' class='' max='2870' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" 56.45% [1620/2870 02:57<02:16 6.9144]\n",
" </div>\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"learn.fit_one_cycle(3, 1e-4, div=5, cbs=WandbCallback(log_preds=False, log_model=False))"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"# # exports\n",
"# class myRemoveEOSCallback(Callback):\n",
"# \"\"\"\n",
"# Shift the target presented to the model during training to remove the \"eos\" token as \n",
"# we don't want the model to learn to translate EOS when it sees EOS.\n",
" \n",
"# In practice we actually mask the EOS token as due to batching the last token will often be a <pad> token,\n",
"# not EOS\n",
"# \"\"\"\n",
"# def __init__(self, eos_idx): self.eos_idx=eos_idx\n",
"# def before_batch(self): \n",
"# eos_mask=(self.learn.xb[1]!=self.eos_idx)\n",
"# sz=torch.tensor(self.learn.xb[1].size())\n",
"# # If ids contain eos token ids, do masking\n",
"# if eos_mask.sum() < sz[0]*sz[1]: \n",
"# sz[1]=sz[1]-1\n",
"# self.learn.xb = (self.learn.xb[0], self.learn.xb[1][eos_mask].view((sz[0],sz[1])))\n",
"# return\n",
"# else: return"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(125104044, True)"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# cbs = [CombineInputOutputCallback(), LossTargetShiftCallback(), RemoveEOSCallback(eos_idx=tok.EOS_ID)]\n",
"# # cbs = [CombineInputOutputCallback(), LossTargetShiftCallback()] #, RemoveEOSCallback(eos_idx=tok.EOS_ID)]\n",
"\n",
"# learn = Learner(dls, TransformerEncDec(enc_vocab_sz, dec_vocab_sz, d_model=d_model, heads=8, \n",
"# max_seq_len=256, pad_idx=tok.PAD_ID, tie_weights=True, shared_emb=True,\n",
"# attn_dropout=0.0, ff_dropout=0.0, emb_dropout=0.0,\n",
"# pos_enc='fixed'),\n",
"# loss_func=CrossEntropyLossFlat(ignore_index=tok.PAD_ID), opt_func=adafactor, cbs=cbs,\n",
"# metrics=[accuracy, Perplexity(), CorpusBLEUMetric()]).to_native_fp16()\n",
"\n",
"# total_params(learn.model)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment