Skip to content

Instantly share code, notes, and snippets.

@bharadwaj6
Created May 25, 2018 21:28
Show Gist options
  • Save bharadwaj6/6af00c692d2a25c1df299c39632cb8ff to your computer and use it in GitHub Desktop.
Save bharadwaj6/6af00c692d2a25c1df299c39632cb8ff to your computer and use it in GitHub Desktop.
telugu2vec
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%reload_ext autoreload\n",
"%autoreload 2\n",
"%matplotlib inline\n",
"\n",
"import re\n",
"\n",
"import torchtext\n",
"from torchtext import vocab, data\n",
"from torchtext.datasets import language_modeling\n",
"\n",
"from fastai.learner import *\n",
"from fastai.rnn_reg import *\n",
"from fastai.rnn_train import *\n",
"from fastai.nlp import *\n",
"from fastai.lm_rnn import *\n",
"\n",
"\n",
"import dill as pickle\n",
"from IPython.display import Image\n",
"from IPython.core.display import HTML"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"PATH = 'data/teluguwiki/'\n",
"EXT_PATH = 'extract/'\n",
"TRN_PATH = 'train/'\n",
"VAL_PATH = 'valid/'\n",
"SAMPLE_PATH = 'sample/'\n",
"\n",
"EXT = f'{PATH}{EXT_PATH}'\n",
"TRN = f'{PATH}{TRN_PATH}'\n",
"VAL = f'{PATH}{VAL_PATH}'\n",
"SAMPLE = f'{PATH}{SAMPLE_PATH}'\n",
"\n",
"ext_files = !ls {EXT}\n",
"sample_files = !ls {SAMPLE}"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import html\n",
"re1 = re.compile(r' +')\n",
"\n",
"def fixup(x):\n",
" x = x.replace('#39;', \"'\").replace('amp;', '&').replace('#146;', \"'\").replace(\n",
" 'nbsp;', ' ').replace('#36;', '$').replace('\\\\n', \"\\n\").replace('quot;', \"'\").replace(\n",
" '<br />', \"\\n\").replace('\\\\\"', '\"').replace('<unk>','u_n').replace(' @.@ ','.').replace(\n",
" ' @-@ ','-').replace('\\\\', ' \\\\ ')\n",
" return re1.sub(' ', html.unescape(x))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"cleaned_all = []\n",
"for ext_file in ext_files:\n",
" raw_txt = !cat {EXT}{ext_file}\n",
" cleaned_doc = []\n",
" for line in raw_txt:\n",
" # remove tags\n",
" new_line = re.sub('<[^<]+?>', '', line)\n",
" new_line = re.sub('__[^<]+?__', '', new_line)\n",
" new_line = fixup(new_line)\n",
" new_line = new_line.strip()\n",
" if new_line != '':\n",
" cleaned_doc.append(new_line)\n",
" new_doc = '\\n'.join(cleaned_doc)\n",
" with open(f'{TRN}{ext_file}.txt', 'w+') as text_file:\n",
" text_file.write(new_doc)\n",
" cleaned_all.append(cleaned_doc)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"trn_files = !ls {TRN}\n",
"val_files = !ls {VAL}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create validation set"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import random\n",
"random.seed = 42"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"random.shuffle(trn_files)\n",
"val_files = trn_files[:10] # about 20%\n",
"trn_files = trn_files[10:]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"import shutil, os\n",
"\n",
"for root, dirs, files in os.walk(TRN):\n",
" for file in files:\n",
" if file.endswith('.txt') and file in val_files:\n",
" shutil.move(os.path.join(root, file), VAL)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"trn_files = !ls {TRN}\n",
"val_files = !ls {VAL}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Data generator"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Telugu tokenizer"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"text = \"అందువల్ల ఇక్కడ జరుగుతున్నది అతన్ని హేళన చెయ్యటం. అతన్ని “ధీవిశాలు”డని పిలవటం కూడ దాన్లో భాగమే. పైకి పొగడ్తగా కనిపిస్తూ లోపల అవహేళన నిండిన ఈ పద్యం ఆలోచనామృతం!\"\n",
"a = tokenize(text)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"['అందువల్ల',\n",
" 'ఇక్కడ',\n",
" 'జరుగుతున్నది',\n",
" 'అతన్ని',\n",
" 'హేళన',\n",
" 'చెయ్యటం',\n",
" '.',\n",
" 'అతన్ని',\n",
" '“',\n",
" 'ధీవిశాలు',\n",
" '”',\n",
" 'డని',\n",
" 'పిలవటం',\n",
" 'కూడ',\n",
" 'దాన్లో',\n",
" 'భాగమే',\n",
" '.',\n",
" 'పైకి',\n",
" 'పొగడ్తగా',\n",
" 'కనిపిస్తూ',\n",
" 'లోపల',\n",
" 'అవహేళన',\n",
" 'నిండిన',\n",
" 'ఈ',\n",
" 'పద్యం',\n",
" 'ఆలోచనామృతం',\n",
" '!']"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Lot more cleaning to do in tokenization. Punctuations, roman numerals, currency notation have to be accounted for first. And morphological analysis maybe? Gotta deal with Agglutination!"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# TEXT = data.Field(lower=True, tokenize=tokenize)\n",
"TEXT = pickle.load(open(f'{PATH}models/TEXT.pkl','rb'))\n",
"bs = 32\n",
"bptt = 70"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>.</th>\n",
" <td>2428201</td>\n",
" </tr>\n",
" <tr>\n",
" <th>,</th>\n",
" <td>1284024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>కి</th>\n",
" <td>373552</td>\n",
" </tr>\n",
" <tr>\n",
" <th>నుండి</th>\n",
" <td>361665</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ఉన్నాయి</th>\n",
" <td>306000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>మీ</th>\n",
" <td>287805</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ఉంది</th>\n",
" <td>275448</td>\n",
" </tr>\n",
" <tr>\n",
" <th>గ్రామం</th>\n",
" <td>258004</td>\n",
" </tr>\n",
" <tr>\n",
" <th>దూరంలో</th>\n",
" <td>249881</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>222843</td>\n",
" </tr>\n",
" <tr>\n",
" <th>\"</th>\n",
" <td>190407</td>\n",
" </tr>\n",
" <tr>\n",
" <th>గ్రామంలో</th>\n",
" <td>183030</td>\n",
" </tr>\n",
" <tr>\n",
" <th>)</th>\n",
" <td>151912</td>\n",
" </tr>\n",
" <tr>\n",
" <th>(</th>\n",
" <td>149897</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>148945</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0\n",
". 2428201\n",
", 1284024\n",
"కి 373552\n",
"నుండి 361665\n",
"ఉన్నాయి 306000\n",
"మీ 287805\n",
"ఉంది 275448\n",
"గ్రామం 258004\n",
"దూరంలో 249881\n",
"10 222843\n",
"\" 190407\n",
"గ్రామంలో 183030\n",
") 151912\n",
"( 149897\n",
"5 148945"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"freqs = pd.DataFrame.from_dict(TEXT.vocab.freqs, orient='index')\n",
"freqs.sort_values(0, ascending=False).head(15)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"cnt = []\n",
"for i in range(49):\n",
" row_cnt = freqs[freqs[0] >= i+1].shape[0]\n",
" cnt.append(row_cnt)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.lines.Line2D at 0x7fae77636208>"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.plot(cnt)\n",
"plt.axvline(x=10, color='red', linestyle='--')"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>శిలాశాసనాల</th>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>నేపధ్యంలో</th>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>స్ధానం</th>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ranaut</th>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>మన్హట్టాన్</th>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>తుంగభద్రకు</th>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>సందర్భాలలోను</th>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>shares</th>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>చిహ్నాలైన</th>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>కోయిలకొండలోను</th>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0\n",
"శిలాశాసనాల 10\n",
"నేపధ్యంలో 10\n",
"స్ధానం 10\n",
"ranaut 10\n",
"మన్హట్టాన్ 10\n",
"తుంగభద్రకు 10\n",
"సందర్భాలలోను 10\n",
"shares 10\n",
"చిహ్నాలైన 10\n",
"కోయిలకొండలోను 10"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#number of legitimate telugu words at freqs\n",
"freqs[freqs[0] <= 10].sort_values(ascending=False, by=0).head(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Looks like we also have lot of english words, not surprising."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Language model"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# ignore that test is pointing to the same directory. We wont be using test set below.\n",
"FILES = dict(train=f'{TRN_PATH}', validation=f'{VAL_PATH}', test=f'{TRN_PATH}')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"md = LanguageModelData.from_text_files(PATH, TEXT, **FILES, bs=bs, bptt=bptt, min_freq=10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# pickle.dump(TEXT, open(f'{PATH}models/TEXT.pkl','wb+'))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"Variable containing:\n",
" 0\n",
" 84694\n",
" 35129\n",
" 84694\n",
" 15\n",
" 1246\n",
" 11\n",
" 3\n",
" 5485\n",
" 21\n",
"[torch.cuda.LongTensor of size 10x1 (GPU 0)]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#trn_ds is list; one for each txt file\n",
"txt = md.trn_ds[0].text[:10]\n",
"TEXT.numericalize([txt]) # change to CPU/ GPU depending on hardware"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"em_sz = 300 # size of each embedding vector\n",
"nh = 500 # number of hidden activations per layer\n",
"nl = 3 # number of layers\n",
"wd = 1e-7 # weight decay"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"opt_fn = partial(optim.Adam, betas=(0.8, 0.99))\n",
"drops = np.array([0.25, 0.1, 0.2, 0.02, 0.15]) * 0.7"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"learner= md.get_model(opt_fn, em_sz, nh, nl, dropouti=drops[0], dropout=drops[1], \n",
" wdrop=drops[2], dropoute=drops[3], dropouth=drops[4])\n",
"learner.reg_fn = partial(seq2seq_reg, alpha=2, beta=1)\n",
"learner.clip = 0.3"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "39bd9fcb9a1a4fc6839c79df1a2a284f",
"version_major": 2,
"version_minor": 0
},
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
"<p>\n",
" If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"https://ipywidgets.readthedocs.io/en/stable/user_install.html\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
"</p>\n",
"<p>\n",
" If you're reading this message in another frontend (for example, a static\n",
" rendering on GitHub or <a href=\"https://nbviewer.jupyter.org/\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"</p>\n"
],
"text/plain": [
"HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" 78%|███████▊ | 8218/10518 [46:56<13:08, 2.92it/s, loss=15.1] "
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#find suitable learning rates\n",
"learner.lr_find(1e-07, 1e2)\n",
"learner.sched.plot()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"lr = 7 * 1e-4"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"# learner.metrics = [accuracy]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "284a1da6aec64ed086559a68a654cb53",
"version_major": 2,
"version_minor": 0
},
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
"<p>\n",
" If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"https://ipywidgets.readthedocs.io/en/stable/user_install.html\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
"</p>\n",
"<p>\n",
" If you're reading this message in another frontend (for example, a static\n",
" rendering on GitHub or <a href=\"https://nbviewer.jupyter.org/\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"</p>\n"
],
"text/plain": [
"HBox(children=(IntProgress(value=0, description='Epoch', max=15), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch trn_loss val_loss \n",
" 0 3.704231 3.870589 \n",
" 1 3.460246 3.67693 \n",
" 2 3.443637 3.621497 \n",
" 3 3.333726 3.584137 \n",
" 4 3.291139 3.527215 \n",
" 5 3.270766 3.492533 \n",
" 6 3.254767 3.482341 \n",
" 7 3.255783 3.508195 \n",
" 8 3.243309 3.483084 \n",
" 9 3.260235 3.45983 \n",
" 10 3.209985 3.443384 \n",
" 11 3.192712 3.427085 \n",
" 12 3.191212 3.414559 \n",
" 13 3.175301 3.407779 \n",
" 78%|███████▊ | 8182/10448 [46:56<12:59, 2.91it/s, loss=3.32]"
]
}
],
"source": [
"learner.fit(lr, 4, wds=wd, cycle_len=1, cycle_mult=2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"learner.save_encoder('adam1_enc')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"learner.load_encoder('adam1_enc')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch trn_loss val_loss \n",
" 0 3.178207 3.409042 \n",
" 1 3.213273 3.501319 \n",
" 2 3.226869 3.497255 \n",
" 3 3.216037 3.48765 \n",
" 4 3.178982 3.477347 \n",
" 5 3.181992 3.469156 \n",
" \r"
]
}
],
"source": [
"# learner.fit(lr, 1, wds=1e-6, cycle_len=20, cycle_save_name='adam3_20')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# learner.save_encoder('adam3_enc')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"m = learner.model\n",
"pickle.dump(m,open(f'{PATH}models/wiki_lang.pkl','wb'))"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"SequentialRNN(\n",
" (0): RNN_Encoder(\n",
" (encoder): Embedding(85686, 300, padding_idx=1)\n",
" (encoder_with_dropout): EmbeddingDropout(\n",
" (embed): Embedding(85686, 300, padding_idx=1)\n",
" )\n",
" (rnns): ModuleList(\n",
" (0): WeightDrop(\n",
" (module): LSTM(300, 500, dropout=0.105)\n",
" )\n",
" (1): WeightDrop(\n",
" (module): LSTM(500, 500, dropout=0.105)\n",
" )\n",
" (2): WeightDrop(\n",
" (module): LSTM(500, 300, dropout=0.105)\n",
" )\n",
" )\n",
" (dropouti): LockedDropout(\n",
" )\n",
" (dropouths): ModuleList(\n",
" (0): LockedDropout(\n",
" )\n",
" (1): LockedDropout(\n",
" )\n",
" (2): LockedDropout(\n",
" )\n",
" )\n",
" )\n",
" (1): LinearDecoder(\n",
" (decoder): Linear(in_features=300, out_features=85686, bias=False)\n",
" (dropout): LockedDropout(\n",
" )\n",
" )\n",
")"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# TEXT = pickle.load(open(f'{PATH}models/TEXT.pkl','rb'))\n",
"# m = pickle.load(open(f'{PATH}models/wiki_lang.pkl','rb'))\n",
"m[0].bs=1\n",
"m.eval()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"def gen_text(ss,topk):\n",
" s = [tokenize(ss)]\n",
" t = TEXT.numericalize(s)\n",
" m.reset()\n",
" pred,*_ = m(t)\n",
" pred_i = torch.topk(pred[-1], topk)[1]\n",
" return [TEXT.vocab.itos[o] for o in to_np(pred_i)]\n",
"\n",
"def gen_sentences(ss,nb_words):\n",
" result = []\n",
" s = [tokenize(ss)]\n",
" t = TEXT.numericalize(s)\n",
" m.reset()\n",
" pred,*_ = m(t)\n",
" for i in range(nb_words):\n",
" pred_i = pred[-1].topk(2)[1]\n",
" pred_i = pred_i[1] if pred_i.data[0] < 2 else pred_i[0]\n",
" result.append(TEXT.vocab.itos[pred_i.data[0]])\n",
" pred,*_ = m(pred_i[0].unsqueeze(0))\n",
" return(result)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['ఆయన', '<unk>', 'ఈ', 'ఆ', 'ఆమె', '\"', 'తరువాత', 'అయితే', 'తన', 'ఆయనకు']"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_sentence = \"ఆయన కుటుంబం ఆయనకు వారి స్తోమత ప్రకారం వైద్యాన్ని అందించింది.\"\n",
"gen_text(test_sentence, 10)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'ఆయనతండ్రిపేరు\"శ్రీకృష్ణ\".ఆయనతండ్రిపేరు\"శ్రీకృష్ణ\".ఆయనతండ్రిపేరు\"శ్రీకృష్ణ\".ఆయనతండ్రిపేరు\"శ్రీకృష్ణ\".ఆయనతండ్రిపేరు\"శ్రీకృష్ణ\".ఆయనతండ్రిపేరు\"శ్రీకృష్ణ\".ఆయనతండ్రి'"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"''.join(gen_sentences(test_sentence, 50))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Embeddings"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"emb_weights = list(learner.model.named_parameters())[0][1]\n",
"emb_np = to_np(emb_weights.data)\n",
"\n",
"TEXT = pickle.load(open(f'{PATH}models/TEXT.pkl','rb'))\n",
"TEXT.vocab.set_vectors(vectors=emb_weights.data, dim=300, stoi=TEXT.vocab.stoi)\n",
"pickle.dump(TEXT, open(f'{PATH}models/TEXT_vec.pkl','wb'))"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"TEXT_vec = pickle.load(open(f'{PATH}models/TEXT_vec.pkl','rb'))"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>...</th>\n",
" <th>290</th>\n",
" <th>291</th>\n",
" <th>292</th>\n",
" <th>293</th>\n",
" <th>294</th>\n",
" <th>295</th>\n",
" <th>296</th>\n",
" <th>297</th>\n",
" <th>298</th>\n",
" <th>299</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>&lt;unk&gt;</th>\n",
" <td>0.162756</td>\n",
" <td>-0.118457</td>\n",
" <td>0.066381</td>\n",
" <td>-0.056356</td>\n",
" <td>-0.202901</td>\n",
" <td>0.066646</td>\n",
" <td>0.113037</td>\n",
" <td>0.568882</td>\n",
" <td>-0.354657</td>\n",
" <td>0.329644</td>\n",
" <td>...</td>\n",
" <td>-0.195009</td>\n",
" <td>0.371669</td>\n",
" <td>0.091774</td>\n",
" <td>0.007729</td>\n",
" <td>0.158614</td>\n",
" <td>0.325467</td>\n",
" <td>-0.185765</td>\n",
" <td>0.930058</td>\n",
" <td>-0.041299</td>\n",
" <td>-0.026871</td>\n",
" </tr>\n",
" <tr>\n",
" <th>&lt;pad&gt;</th>\n",
" <td>-0.479526</td>\n",
" <td>0.014009</td>\n",
" <td>-0.450860</td>\n",
" <td>-0.361312</td>\n",
" <td>0.154549</td>\n",
" <td>0.071390</td>\n",
" <td>-0.295752</td>\n",
" <td>-0.418422</td>\n",
" <td>0.770350</td>\n",
" <td>0.555413</td>\n",
" <td>...</td>\n",
" <td>0.291602</td>\n",
" <td>-0.546950</td>\n",
" <td>-0.103712</td>\n",
" <td>-0.113335</td>\n",
" <td>-0.270057</td>\n",
" <td>-0.270375</td>\n",
" <td>0.351973</td>\n",
" <td>-0.412007</td>\n",
" <td>0.125875</td>\n",
" <td>-0.121921</td>\n",
" </tr>\n",
" <tr>\n",
" <th>.</th>\n",
" <td>-0.126146</td>\n",
" <td>-0.098541</td>\n",
" <td>-0.013123</td>\n",
" <td>0.106648</td>\n",
" <td>0.528485</td>\n",
" <td>-0.003247</td>\n",
" <td>-0.646804</td>\n",
" <td>0.522134</td>\n",
" <td>0.108019</td>\n",
" <td>0.543941</td>\n",
" <td>...</td>\n",
" <td>0.040240</td>\n",
" <td>0.321279</td>\n",
" <td>0.051702</td>\n",
" <td>-0.037312</td>\n",
" <td>-0.288039</td>\n",
" <td>0.988383</td>\n",
" <td>-0.087501</td>\n",
" <td>0.600596</td>\n",
" <td>-0.159826</td>\n",
" <td>0.282194</td>\n",
" </tr>\n",
" <tr>\n",
" <th>,</th>\n",
" <td>0.043711</td>\n",
" <td>-0.091255</td>\n",
" <td>0.022957</td>\n",
" <td>0.211157</td>\n",
" <td>0.173500</td>\n",
" <td>0.058316</td>\n",
" <td>1.159627</td>\n",
" <td>0.605265</td>\n",
" <td>0.015381</td>\n",
" <td>0.610853</td>\n",
" <td>...</td>\n",
" <td>-0.099935</td>\n",
" <td>0.480227</td>\n",
" <td>0.057349</td>\n",
" <td>-0.036091</td>\n",
" <td>0.111147</td>\n",
" <td>0.623288</td>\n",
" <td>-0.184326</td>\n",
" <td>1.418015</td>\n",
" <td>0.020298</td>\n",
" <td>0.187803</td>\n",
" </tr>\n",
" <tr>\n",
" <th>కి</th>\n",
" <td>0.373087</td>\n",
" <td>0.145757</td>\n",
" <td>-0.918256</td>\n",
" <td>-1.009143</td>\n",
" <td>-1.178270</td>\n",
" <td>-0.274561</td>\n",
" <td>-0.707244</td>\n",
" <td>0.399376</td>\n",
" <td>0.197722</td>\n",
" <td>1.274530</td>\n",
" <td>...</td>\n",
" <td>0.545980</td>\n",
" <td>-0.941536</td>\n",
" <td>-0.132543</td>\n",
" <td>-0.372984</td>\n",
" <td>0.386519</td>\n",
" <td>0.748406</td>\n",
" <td>-0.599072</td>\n",
" <td>-1.956071</td>\n",
" <td>-0.498130</td>\n",
" <td>1.298580</td>\n",
" </tr>\n",
" <tr>\n",
" <th>నుండి</th>\n",
" <td>0.271972</td>\n",
" <td>0.008300</td>\n",
" <td>-0.261310</td>\n",
" <td>-0.271940</td>\n",
" <td>-1.232825</td>\n",
" <td>-0.001914</td>\n",
" <td>-1.091928</td>\n",
" <td>0.509688</td>\n",
" <td>0.286951</td>\n",
" <td>0.536997</td>\n",
" <td>...</td>\n",
" <td>-0.270993</td>\n",
" <td>-0.130284</td>\n",
" <td>-0.021892</td>\n",
" <td>-0.098667</td>\n",
" <td>-0.450791</td>\n",
" <td>0.570602</td>\n",
" <td>0.177364</td>\n",
" <td>-0.171380</td>\n",
" <td>0.128581</td>\n",
" <td>-0.475457</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ఉన్నాయి</th>\n",
" <td>-0.114024</td>\n",
" <td>-0.124042</td>\n",
" <td>-0.311016</td>\n",
" <td>-0.048162</td>\n",
" <td>0.314308</td>\n",
" <td>0.240846</td>\n",
" <td>-4.632638</td>\n",
" <td>0.775169</td>\n",
" <td>0.661511</td>\n",
" <td>0.968530</td>\n",
" <td>...</td>\n",
" <td>-0.004587</td>\n",
" <td>0.103790</td>\n",
" <td>0.014730</td>\n",
" <td>-0.635435</td>\n",
" <td>0.146665</td>\n",
" <td>1.464527</td>\n",
" <td>0.039011</td>\n",
" <td>0.259942</td>\n",
" <td>0.172892</td>\n",
" <td>0.677400</td>\n",
" </tr>\n",
" <tr>\n",
" <th>మీ</th>\n",
" <td>1.038532</td>\n",
" <td>0.625470</td>\n",
" <td>-0.522149</td>\n",
" <td>-0.468230</td>\n",
" <td>-0.561634</td>\n",
" <td>0.374316</td>\n",
" <td>-0.048730</td>\n",
" <td>0.466691</td>\n",
" <td>-0.038677</td>\n",
" <td>1.585641</td>\n",
" <td>...</td>\n",
" <td>0.840410</td>\n",
" <td>2.888513</td>\n",
" <td>0.066506</td>\n",
" <td>0.026903</td>\n",
" <td>0.290671</td>\n",
" <td>0.485242</td>\n",
" <td>0.535367</td>\n",
" <td>-1.048379</td>\n",
" <td>0.104699</td>\n",
" <td>0.856214</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ఉంది</th>\n",
" <td>0.111868</td>\n",
" <td>0.211571</td>\n",
" <td>-0.292743</td>\n",
" <td>-0.233061</td>\n",
" <td>0.190581</td>\n",
" <td>0.227745</td>\n",
" <td>-1.724061</td>\n",
" <td>0.750188</td>\n",
" <td>0.477490</td>\n",
" <td>1.293393</td>\n",
" <td>...</td>\n",
" <td>-0.310083</td>\n",
" <td>0.125917</td>\n",
" <td>-0.133561</td>\n",
" <td>-0.162669</td>\n",
" <td>0.177769</td>\n",
" <td>1.329759</td>\n",
" <td>0.129581</td>\n",
" <td>0.850391</td>\n",
" <td>0.244996</td>\n",
" <td>0.585481</td>\n",
" </tr>\n",
" <tr>\n",
" <th>గ్రామం</th>\n",
" <td>0.350491</td>\n",
" <td>-0.038242</td>\n",
" <td>0.184957</td>\n",
" <td>-0.552766</td>\n",
" <td>-1.279211</td>\n",
" <td>0.063894</td>\n",
" <td>1.263454</td>\n",
" <td>0.927973</td>\n",
" <td>0.461793</td>\n",
" <td>1.577360</td>\n",
" <td>...</td>\n",
" <td>0.149890</td>\n",
" <td>0.034798</td>\n",
" <td>-0.373647</td>\n",
" <td>0.095328</td>\n",
" <td>0.144108</td>\n",
" <td>-2.156482</td>\n",
" <td>0.859847</td>\n",
" <td>0.522844</td>\n",
" <td>0.414594</td>\n",
" <td>0.991583</td>\n",
" </tr>\n",
" <tr>\n",
" <th>దూరంలో</th>\n",
" <td>-0.230388</td>\n",
" <td>0.472743</td>\n",
" <td>-0.017128</td>\n",
" <td>-0.983632</td>\n",
" <td>3.105000</td>\n",
" <td>-0.159550</td>\n",
" <td>-0.509644</td>\n",
" <td>0.722185</td>\n",
" <td>-0.487401</td>\n",
" <td>1.580943</td>\n",
" <td>...</td>\n",
" <td>0.426283</td>\n",
" <td>0.063384</td>\n",
" <td>-0.045331</td>\n",
" <td>0.543215</td>\n",
" <td>0.471905</td>\n",
" <td>1.090400</td>\n",
" <td>-0.339869</td>\n",
" <td>-1.046772</td>\n",
" <td>0.323809</td>\n",
" <td>1.054026</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>0.744862</td>\n",
" <td>-0.247973</td>\n",
" <td>-0.146063</td>\n",
" <td>-0.577485</td>\n",
" <td>-0.807728</td>\n",
" <td>-0.236516</td>\n",
" <td>-0.613125</td>\n",
" <td>0.630741</td>\n",
" <td>0.188854</td>\n",
" <td>1.338283</td>\n",
" <td>...</td>\n",
" <td>0.179789</td>\n",
" <td>0.452522</td>\n",
" <td>-0.251397</td>\n",
" <td>-0.129144</td>\n",
" <td>-0.205054</td>\n",
" <td>0.664164</td>\n",
" <td>0.249665</td>\n",
" <td>-0.132851</td>\n",
" <td>-0.025986</td>\n",
" <td>-0.582486</td>\n",
" </tr>\n",
" <tr>\n",
" <th>\"</th>\n",
" <td>-0.049747</td>\n",
" <td>0.314193</td>\n",
" <td>0.200771</td>\n",
" <td>-0.271289</td>\n",
" <td>0.850634</td>\n",
" <td>0.149836</td>\n",
" <td>0.352507</td>\n",
" <td>0.562104</td>\n",
" <td>0.010921</td>\n",
" <td>0.360467</td>\n",
" <td>...</td>\n",
" <td>-0.042071</td>\n",
" <td>0.167196</td>\n",
" <td>0.141774</td>\n",
" <td>-0.362660</td>\n",
" <td>0.660838</td>\n",
" <td>0.777507</td>\n",
" <td>-0.263045</td>\n",
" <td>0.621559</td>\n",
" <td>-0.025154</td>\n",
" <td>0.037878</td>\n",
" </tr>\n",
" <tr>\n",
" <th>గ్రామంలో</th>\n",
" <td>1.024729</td>\n",
" <td>0.615187</td>\n",
" <td>0.286993</td>\n",
" <td>-0.175022</td>\n",
" <td>0.460615</td>\n",
" <td>0.160861</td>\n",
" <td>-0.986948</td>\n",
" <td>1.342301</td>\n",
" <td>-0.253582</td>\n",
" <td>2.256250</td>\n",
" <td>...</td>\n",
" <td>0.418742</td>\n",
" <td>-0.390475</td>\n",
" <td>0.193697</td>\n",
" <td>0.124218</td>\n",
" <td>0.177791</td>\n",
" <td>-0.998894</td>\n",
" <td>0.674249</td>\n",
" <td>1.680783</td>\n",
" <td>0.267778</td>\n",
" <td>0.704820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>)</th>\n",
" <td>0.026697</td>\n",
" <td>0.348036</td>\n",
" <td>-0.162904</td>\n",
" <td>0.012066</td>\n",
" <td>-0.294381</td>\n",
" <td>0.195183</td>\n",
" <td>0.641476</td>\n",
" <td>0.902897</td>\n",
" <td>0.153347</td>\n",
" <td>0.945835</td>\n",
" <td>...</td>\n",
" <td>-0.213940</td>\n",
" <td>0.043878</td>\n",
" <td>0.072876</td>\n",
" <td>0.701433</td>\n",
" <td>0.510473</td>\n",
" <td>1.081460</td>\n",
" <td>-0.019017</td>\n",
" <td>0.141006</td>\n",
" <td>0.305660</td>\n",
" <td>-0.029373</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>15 rows × 300 columns</p>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4 5 \\\n",
"<unk> 0.162756 -0.118457 0.066381 -0.056356 -0.202901 0.066646 \n",
"<pad> -0.479526 0.014009 -0.450860 -0.361312 0.154549 0.071390 \n",
". -0.126146 -0.098541 -0.013123 0.106648 0.528485 -0.003247 \n",
", 0.043711 -0.091255 0.022957 0.211157 0.173500 0.058316 \n",
"కి 0.373087 0.145757 -0.918256 -1.009143 -1.178270 -0.274561 \n",
"నుండి 0.271972 0.008300 -0.261310 -0.271940 -1.232825 -0.001914 \n",
"ఉన్నాయి -0.114024 -0.124042 -0.311016 -0.048162 0.314308 0.240846 \n",
"మీ 1.038532 0.625470 -0.522149 -0.468230 -0.561634 0.374316 \n",
"ఉంది 0.111868 0.211571 -0.292743 -0.233061 0.190581 0.227745 \n",
"గ్రామం 0.350491 -0.038242 0.184957 -0.552766 -1.279211 0.063894 \n",
"దూరంలో -0.230388 0.472743 -0.017128 -0.983632 3.105000 -0.159550 \n",
"10 0.744862 -0.247973 -0.146063 -0.577485 -0.807728 -0.236516 \n",
"\" -0.049747 0.314193 0.200771 -0.271289 0.850634 0.149836 \n",
"గ్రామంలో 1.024729 0.615187 0.286993 -0.175022 0.460615 0.160861 \n",
") 0.026697 0.348036 -0.162904 0.012066 -0.294381 0.195183 \n",
"\n",
" 6 7 8 9 ... 290 \\\n",
"<unk> 0.113037 0.568882 -0.354657 0.329644 ... -0.195009 \n",
"<pad> -0.295752 -0.418422 0.770350 0.555413 ... 0.291602 \n",
". -0.646804 0.522134 0.108019 0.543941 ... 0.040240 \n",
", 1.159627 0.605265 0.015381 0.610853 ... -0.099935 \n",
"కి -0.707244 0.399376 0.197722 1.274530 ... 0.545980 \n",
"నుండి -1.091928 0.509688 0.286951 0.536997 ... -0.270993 \n",
"ఉన్నాయి -4.632638 0.775169 0.661511 0.968530 ... -0.004587 \n",
"మీ -0.048730 0.466691 -0.038677 1.585641 ... 0.840410 \n",
"ఉంది -1.724061 0.750188 0.477490 1.293393 ... -0.310083 \n",
"గ్రామం 1.263454 0.927973 0.461793 1.577360 ... 0.149890 \n",
"దూరంలో -0.509644 0.722185 -0.487401 1.580943 ... 0.426283 \n",
"10 -0.613125 0.630741 0.188854 1.338283 ... 0.179789 \n",
"\" 0.352507 0.562104 0.010921 0.360467 ... -0.042071 \n",
"గ్రామంలో -0.986948 1.342301 -0.253582 2.256250 ... 0.418742 \n",
") 0.641476 0.902897 0.153347 0.945835 ... -0.213940 \n",
"\n",
" 291 292 293 294 295 296 \\\n",
"<unk> 0.371669 0.091774 0.007729 0.158614 0.325467 -0.185765 \n",
"<pad> -0.546950 -0.103712 -0.113335 -0.270057 -0.270375 0.351973 \n",
". 0.321279 0.051702 -0.037312 -0.288039 0.988383 -0.087501 \n",
", 0.480227 0.057349 -0.036091 0.111147 0.623288 -0.184326 \n",
"కి -0.941536 -0.132543 -0.372984 0.386519 0.748406 -0.599072 \n",
"నుండి -0.130284 -0.021892 -0.098667 -0.450791 0.570602 0.177364 \n",
"ఉన్నాయి 0.103790 0.014730 -0.635435 0.146665 1.464527 0.039011 \n",
"మీ 2.888513 0.066506 0.026903 0.290671 0.485242 0.535367 \n",
"ఉంది 0.125917 -0.133561 -0.162669 0.177769 1.329759 0.129581 \n",
"గ్రామం 0.034798 -0.373647 0.095328 0.144108 -2.156482 0.859847 \n",
"దూరంలో 0.063384 -0.045331 0.543215 0.471905 1.090400 -0.339869 \n",
"10 0.452522 -0.251397 -0.129144 -0.205054 0.664164 0.249665 \n",
"\" 0.167196 0.141774 -0.362660 0.660838 0.777507 -0.263045 \n",
"గ్రామంలో -0.390475 0.193697 0.124218 0.177791 -0.998894 0.674249 \n",
") 0.043878 0.072876 0.701433 0.510473 1.081460 -0.019017 \n",
"\n",
" 297 298 299 \n",
"<unk> 0.930058 -0.041299 -0.026871 \n",
"<pad> -0.412007 0.125875 -0.121921 \n",
". 0.600596 -0.159826 0.282194 \n",
", 1.418015 0.020298 0.187803 \n",
"కి -1.956071 -0.498130 1.298580 \n",
"నుండి -0.171380 0.128581 -0.475457 \n",
"ఉన్నాయి 0.259942 0.172892 0.677400 \n",
"మీ -1.048379 0.104699 0.856214 \n",
"ఉంది 0.850391 0.244996 0.585481 \n",
"గ్రామం 0.522844 0.414594 0.991583 \n",
"దూరంలో -1.046772 0.323809 1.054026 \n",
"10 -0.132851 -0.025986 -0.582486 \n",
"\" 0.621559 -0.025154 0.037878 \n",
"గ్రామంలో 1.680783 0.267778 0.704820 \n",
") 0.141006 0.305660 -0.029373 \n",
"\n",
"[15 rows x 300 columns]"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"telugu2vec = pd.DataFrame(to_np(TEXT_vec.vocab.vectors))\n",
"telugu2vec.index = TEXT_vec.vocab.itos\n",
"telugu2vec.head(15)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"telugu2save = telugu2vec[~telugu2vec.index.str.contains(' ')]"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"# remove tokens with weird characters due to bad segmentation - still leaves out lot of bad tokens\n",
"# we can decide to keep unknown and padding tokens in the beginning if we need them later.\n",
"telugu2save = telugu2save.iloc[4:-3, :]"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(85679, 300)"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"telugu2save.shape"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"telugu2save.to_csv(f'{PATH}models/telugu2vec.vec', sep=' ', header=False, line_terminator='\\n')"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"word_list = list(telugu2save.index)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"with open('word_list.txt', 'w+') as word_file:\n",
" word_file.write('\\n'.join(word_list))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"30.236264039141496"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# perplexity approximation\n",
"math.exp(3.409042)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"_draft": {
"nbviewer_url": "https://gist.github.com/0dd0df21cf404cf2bb51d0148c8b7d8b"
},
"gist": {
"data": {
"description": "fastai.text imdb example",
"public": true
},
"id": "0dd0df21cf404cf2bb51d0148c8b7d8b"
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
},
"toc": {
"colors": {
"hover_highlight": "#DAA520",
"navigate_num": "#000000",
"navigate_text": "#333333",
"running_highlight": "#FF0000",
"selected_highlight": "#FFD700",
"sidebar_border": "#EEEEEE",
"wrapper_background": "#FFFFFF"
},
"moveMenuLeft": true,
"nav_menu": {
"height": "86px",
"width": "252px"
},
"navigate_menu": true,
"number_sections": true,
"sideBar": true,
"threshold": 4,
"toc_cell": false,
"toc_section_display": "block",
"toc_window_display": false,
"widenNotebook": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment