Skip to content

Instantly share code, notes, and snippets.

@bharadwaj6
Created May 25, 2018 21:27
Show Gist options
  • Save bharadwaj6/8457ff541731f913f2d26eedfef4d803 to your computer and use it in GitHub Desktop.
Save bharadwaj6/8457ff541731f913f2d26eedfef4d803 to your computer and use it in GitHub Desktop.
Telugu_Language_Model
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import pathlib\n",
"\n",
"from fastai.text import *\n",
"\n",
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data Preparation"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"BOS = 'xbos' # beginning-of-sentence tag\n",
"FLD = 'xfld' # data field tag\n",
"\n",
"PATH = pathlib.Path(\"data/teluguwiki/data\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"LM_PATH=Path('data/telugu_lm/')\n",
"LM_PATH.mkdir(exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"471\n"
]
},
{
"data": {
"text/plain": [
"['data/teluguwiki/data/AC/wiki_97',\n",
" 'data/teluguwiki/data/AC/wiki_00',\n",
" 'data/teluguwiki/data/AC/wiki_69',\n",
" 'data/teluguwiki/data/AC/wiki_14',\n",
" 'data/teluguwiki/data/AC/wiki_20']"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"LANG_FILENAMES = [str(f) for f in PATH.rglob(\"*/*\")]\n",
"print(len(LANG_FILENAMES))\n",
"LANG_FILENAMES[0:5]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"LANG_TEXT = []\n",
"for i in LANG_FILENAMES:\n",
" for line in open(i, encoding='utf-8'):\n",
" LANG_TEXT.append(json.loads(line))\n",
" \n",
"LANG_TEXT = pd.DataFrame(LANG_TEXT)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"LANG_TEXT.to_csv(f\"{LM_PATH}/Wiki_Telugu_Corpus.csv\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"LANG_TEXT = pd.read_csv(f\"{LM_PATH}/Wiki_Telugu_Corpus.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"(LANG_TEXT.assign(labels = 0)\n",
" .pipe(lambda x: x[['labels', 'text']])\n",
" .to_csv(f\"{LM_PATH}/Wiki_Telugu_Corpus2.csv\", header=None, index=False))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Some statistics of Telugu Wikipedia"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"# Getting rid of the title name in the text field\n",
"def split_title_from_text(text):\n",
" words = text.split(\"\\n\\n\")\n",
" if len(words) >= 2:\n",
" return ''.join(words[1:])\n",
" else:\n",
" return ''.join(words)\n",
" \n",
"LANG_TEXT['text'] = LANG_TEXT['text'].apply(lambda x: split_title_from_text(x))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Number of documents"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(69001, 4)"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"LANG_TEXT.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Number of words in all the documents"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"22174830"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"LANG_TEXT['text'].apply(lambda x: len(x.split(\" \"))).sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Number of unique tokens across documents"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2023529"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(set(''.join(LANG_TEXT['text'].values).split(\" \")))"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"def get_texts(df, n_lbls=1):\n",
" labels = df.iloc[:,range(n_lbls)].values.astype(np.int64)\n",
" texts = f'\\n{BOS} {FLD} 1 ' + df[n_lbls].astype(str)\n",
" for i in range(n_lbls+1, len(df.columns)): texts += f' {FLD} {i-n_lbls} ' + df[i].astype(str)\n",
" #texts = texts.apply(fixup).values.astype(str)\n",
"\n",
" tok = Tokenizer().proc_all_mp(partition_by_cores(texts)) # splits the list into sublists for processing by each core\n",
" # Lower and upper case is inside the tokenizer\n",
" return tok, list(labels)\n",
"\n",
"def get_all(df, n_lbls):\n",
" tok, labels = [], []\n",
" for i, r in enumerate(df):\n",
" print(i)\n",
" #pdb.set_trace()\n",
" tok_, labels_ = get_texts(r, n_lbls)\n",
" tok += tok_;\n",
" labels += labels_\n",
" return tok, labels"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"LANG_TEXT = pd.read_csv(f\"{LM_PATH}/Wiki_Telugu_Corpus2.csv\", header=None)#, chunksize=5000)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"trn_texts,val_texts = sklearn.model_selection.train_test_split(\n",
" LANG_TEXT, test_size=0.1) # split the data into train and validation sets"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(42)\n",
"trn_idx = np.random.permutation(len(trn_texts)) # generate a random ordering\n",
"val_idx = np.random.permutation(len(val_texts))\n",
"\n",
"df_trn = trn_texts.iloc[trn_idx,:] # sort things randomly\n",
"df_val = val_texts.iloc[val_idx,:] # sort things randomly\n",
"\n",
"df_trn.columns = ['labels', 'text']\n",
"df_val.columns = ['labels', 'text']\n",
"\n",
"df_trn.to_csv(LM_PATH/'train.csv', header=False, index=False)\n",
"df_val.to_csv(LM_PATH/'test.csv', header=False, index=False) # saving the data in our new format to disk"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"chunksize = 10000\n",
"df_trn = pd.read_csv(LM_PATH/'train.csv', header=None, chunksize=chunksize)\n",
"df_val = pd.read_csv(LM_PATH/'test.csv', header=None, chunksize=chunksize)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0\n",
"1\n",
"2\n",
"3\n",
"4\n",
"5\n",
"0\n"
]
}
],
"source": [
"tok_trn, trn_labels = get_all(df_trn, 1)\n",
"tok_val, val_labels = get_all(df_val, 1)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"# create a tmp directory to store the upcoming numpy arrays\n",
"(LM_PATH/'tmp').mkdir(exist_ok=True)\n",
"\n",
"# save the train and validation tokens in the tmp directories\n",
"np.save(LM_PATH/'tmp'/'tok_trn.npy', tok_trn)\n",
"np.save(LM_PATH/'tmp'/'tok_val.npy', tok_val)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"tok_trn = np.load(LM_PATH/'tmp'/'tok_trn.npy')\n",
"tok_val = np.load(LM_PATH/'tmp'/'tok_val.npy')"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(',', 1182001),\n",
" ('\\n', 563593),\n",
" ('\\n\\n', 365503),\n",
" ('నుండి', 315059),\n",
" ('ఉన్నాయి.', 257793),\n",
" ('దూరంలో', 213086),\n",
" ('గ్రామం', 213014),\n",
" ('ఉంది.', 212279),\n",
" ('10', 185585),\n",
" ('గ్రామంలో', 160000),\n",
" ('\"', 153647),\n",
" ('ఈ', 143467),\n",
" ('మరియు', 141523),\n",
" ('కి.మీ.', 130526),\n",
" ('(', 130198),\n",
" (')', 127110),\n",
" ('5', 123026),\n",
" ('కేంద్రం', 115188),\n",
" ('సమీప', 112802),\n",
" ('.', 101798),\n",
" ('ఒక', 88165),\n",
" ('సౌకర్యం', 79118),\n",
" ('ద్వారా', 75279),\n",
" ('కూడా', 74616),\n",
" ('పైబడిన', 72048)]"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Identify the most common tokens and numericalizing the text\n",
"freq = Counter(p for o in tok_trn for p in o) \n",
"freq.most_common(25)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"# Truncating our vocab to ignore the rare words\n",
"max_vocab = 60000\n",
"min_freq = 5\n",
"\n",
"itos = [o for o,c in freq.most_common(max_vocab) if c>min_freq] # getting rid of the rare words\n",
"itos.insert(0, '_pad_') # \n",
"itos.insert(0, '_unk_') # itos is the list of all the strings in the vocab"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"60002"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# creating a index-key dictionary for our vocabulary\n",
"stoi = collections.defaultdict(lambda:0, {v:k for k,v in enumerate(itos)})\n",
"len(itos)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"# creating a index representation for our train and validation dataset\n",
"trn_lm = np.array([[stoi[o] for o in p] for p in tok_trn])\n",
"val_lm = np.array([[stoi[o] for o in p] for p in tok_val])"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"# saving our indexed representation of our dataset to disk\n",
"# we also save the index-word mapping to retrieve the complete text representation from these numpy arrays\n",
"np.save(LM_PATH/'tmp'/'trn_ids.npy', trn_lm)\n",
"np.save(LM_PATH/'tmp'/'val_ids.npy', val_lm)\n",
"pickle.dump(itos, open(LM_PATH/'tmp'/'itos.pkl', 'wb'))"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"# Loading the indexed representation of our dataset from disk\n",
"# we also load the index-word mapping to to help us convert the indexes to word datasets, if need be.\n",
"trn_lm = np.load(LM_PATH/'tmp'/'trn_ids.npy')\n",
"val_lm = np.load(LM_PATH/'tmp'/'val_ids.npy')\n",
"itos = pickle.load(open(LM_PATH/'tmp'/'itos.pkl', 'rb'))"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(60002, 52100)"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# checking vocabulary size\n",
"vs=len(itos)\n",
"vs,len(trn_lm)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Model Setup"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"# ! wget -nH -r -np http://files.fast.ai/models/wt103/\n",
"# mv models/ {LM_PATH}"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"em_sz,nh,nl = 400,1150,3\n",
"\n",
"PRE_PATH = LM_PATH/'models'/'wt103'\n",
"PRE_LM_PATH = PRE_PATH/'fwd_wt103.h5'\n",
"\n",
"itos2 = pickle.load((PRE_PATH/'itos_wt103.pkl').open('rb')) # mapping the itos from wiki to our own mapping\n",
"stoi2 = collections.defaultdict(lambda:-1, {v:k for k,v in enumerate(itos2)})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# we train from scratch so these are unused\n",
"# wgts = torch.load(PRE_LM_PATH, map_location=lambda storage, loc: storage)\n",
"\n",
"# enc_wgts = to_np(wgts['0.encoder.weight'])\n",
"# row_m = enc_wgts.mean(0)\n",
"\n",
"# wgts['0.encoder.weight'] = T(new_w)\n",
"# wgts['0.encoder_with_dropout.embed.weight'] = T(np.copy(new_w))\n",
"# wgts['1.decoder.weight'] = T(np.copy(new_w))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Language Model"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"wd=1e-7\n",
"bptt=70\n",
"bs=52\n",
"opt_fn = partial(optim.Adam, betas=(0.8, 0.99))"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"em_sz,nh,nl = 400,1150,3"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"trn_dl = LanguageModelLoader(np.concatenate(trn_lm), bs, bptt)\n",
"val_dl = LanguageModelLoader(np.concatenate(val_lm), bs, bptt)\n",
"md = LanguageModelData(PATH, 1, vs, trn_dl, val_dl, bs=bs, bptt=bptt)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"drops = np.array([0.25, 0.1, 0.2, 0.02, 0.15])*0.7 # if you're overfitting, increase this. Underfitting? decrease this."
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"learner= md.get_model(opt_fn, em_sz, nh, nl, \n",
" dropouti=drops[0], dropout=drops[1], wdrop=drops[2], dropoute=drops[3], dropouth=drops[4])\n",
"\n",
"learner.metrics = [accuracy]\n",
"learner.unfreeze()"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6f8bf3e319054d1b95146542e461a3dd",
"version_major": 2,
"version_minor": 0
},
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
"<p>\n",
" If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"https://ipywidgets.readthedocs.io/en/stable/user_install.html\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
"</p>\n",
"<p>\n",
" If you're reading this message in another frontend (for example, a static\n",
" rendering on GitHub or <a href=\"https://nbviewer.jupyter.org/\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"</p>\n"
],
"text/plain": [
"HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" 70%|███████ | 3969/5657 [13:44<05:50, 4.81it/s, loss=14.3]"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEOCAYAAACEiBAqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3Xd4XOWZ9/HvPRr1bkmWbbn3bmyLYgOmmA4hdAKEBQJxWJJA2E0hYd8EkhDIJrskCwngUEMoWUwvoW7AxhW54YqN5YLkJsnqXZr7/WNGsiyrWzNnyv25Ll2aOXNmzu9opLn1nOec5xFVxRhjTORyOR3AGGOMs6wQGGNMhLNCYIwxEc4KgTHGRDgrBMYYE+GsEBhjTISzQmCMMRHOCoExxkQ4KwTGGBPhrBAYY0yEczsdoCcyMzN15MiRTscwxpiQsnr16mJVzepuvZAoBCNHjiQvL8/pGMYYE1JEZHdP1rNDQ8YYE+GsEBhjTISzQmCMMRHOCoExxkQ4vxUCEXlSRA6KyMY2y64UkU0i4hGRXH9t2xhjTM/5s0XwNHBeu2UbgcuAxX7crjHGmF7w2+mjqrpYREa2W7YFQET8tdkjbN1fQcGhWrzb9H4BCK03Wr+1ZGpJ1nbdlucKgkvA5ZLW54iAS7zLW9Z1tVse5RLcLhdRUYLbJUS5hOh2990uCdjPxRhj2gqJ6wj66rkVe3h2RY9Oow0KLsFbMHyFwVsoXIeLha9wJMa6SYxxkxjrJik2yvfdTUKMm8TYKJJivY+lxkczJC2OwanxJMaG9VttjDkGQfvpICILgAUAw4cP79Nr3Hr6GK7KHYaiqHqX+b6hvgUKrY+1PNp+XY9HUcCjCgoe9d5uWabqfX2Pel+35bsCzR7Fo0pTs9LsURo9Hpo9h+83eZRmj8f3XVu/NzZ7Dt9vPrxeQ7OH6vpmquubKCyrpbq+ier6Jqrqm6hv8nT6s0hLiGby4BTmjc/i1HGZTB6cYi0QY4JYUWU9723azxkTB5KTFu/XbQVtIVDVhcBCgNzcXO1m9Q7lpMX7/QcYTBqbPdTUN1PV0ERNfRNltY3sLaulsKyWgtJa1uwu5YF/bOWBf8DUnBTuv3Q604amOh3bGNOBPYdq+I/XNvLMt06I3EJgei86ykVqgovUhOhO1zlYUcdHWw/y4AfbuPyRZfzlhlxOG9/tUCTGmABrOWoRiHa7P08ffQFYDkwQkQIRuVlELhWRAmAO8LaIvOev7ZuODUyJ45oThvP+nfMYlZnIf7y2gWZPnxpcxhg/avmrdAXgEK4/zxq6ppOHXvXXNk3PpSXEcPv8cXz3+TUs31HCKeMynY5kjGnD4/sHLRBdeXZlcQQ7fUIWUS5h5c4Sp6MYY9ppaRFYITB+lRjrZtLgZD7bdcjpKMaYdjytfQT+rwRWCCJc7ogBrN1TRn1Ts9NRjDFt+ZoELmsRGH+bOyaD+iYP6/aUOR3FGNNGyzkcgbjexwpBhDtxVAYisGyH9RMYE0zU1ySwFoHxu9SEaKYOSWV5vhUCY4LJ4RaB/7dlhcAwd0wGa/eUUttg/QTGBIvWC8rs0JAJhJPGZNDYrOTttrOHjAkWLWOehfSVxSZ0HD9yAG6XsNz6CYwJGof7CKxFYAIgKdbNjGFp1mFsTBDx+AYTtj4CEzBzx2SwobCcyrpGp6MYYwjsWENWCAwAc0Zn0OxRu8rYmCDRemWxtQhMoMwakU6M28WyL+3wkDHBoOWsIWsRmICJi45i9vB0u57AmCDhaR1iwgqBCaA5YzLYvK+CspoGp6MYE/E8alcWGwfMHZOBKqzIt34CY5xmYw0ZR0wfmkZCTBTLdxQ7HcWYiKfh0CIQkSdF5KCIbGyzbICIfCAi233f0/21fdN7MW4Xc8dk8I+N+21YamMc5gmTzuKngfPaLbsL+EhVxwEf+e6bIHLD3JEcrKzn9bV7nY5iTERruaAspAuBqi4G2h9s/jrwjO/2M8Al/tq+6ZtTxmYycVAyTy7d2do0NcYEXjhfR5CtqvsAfN8HBnj7phsiwo1zR7J1fyVrbLIaYxxjcxYDIrJARPJEJK+oqMjpOBHlohlDiHW7eGNdodNRjIlY4XxB2QERGQzg+36wsxVVdaGq5qpqblZWVsACGu8gdPMnDeTtDftp9tjhIWOcEM4XlL0B3OC7fQPweoC3b3rowmlDKK6qt6GpjXFIWFxQJiIvAMuBCSJSICI3Aw8AZ4vIduBs330ThOZPGkhyrJtX1hY4HcWYiBTIC8rc/nphVb2mk4fm+2ubpv/ERUdxwbTBvPn5Xn59SRMJMX77VTHGdCAsLigzoe/SWTnUNDTz/qYDTkcxJuJ4POHbWWxCyAkjB5CTFs+ra+3sIWMCLZw7i00IcbmES2YOYcn2Ig5W1jkdx5iI0npBWQA+pa0QmC5dOjMHj2JDThgTYC0X9gegi8AKgena2IHJnDByAI8t3kGFzWdsTMAo1kdggsjPvzaZkuoG/vDBdqejGBMxrI/ABJWpOalcc8Jwnlm+iy/2Vzodx5iIEM6DzpkQ9aNzJpAU6+aeNzbZqKTGBIBai8AEm/TEGH547gSW55fwzob9TscxJuwdvo7A/9uyQmB67NoThjN5cAr3vb2ZmoYmp+MYE9asj8AEpSiX8MuvT2FveR13v7rRDhEZ40fWR2CCVu7IASyYN5pX1xbyms1XYIzfqCoigRl0zgqB6bW7zpvIjKGpPPCPrVTV2yEiY/zBo4E5LARWCEwfuFzCPRdP4WBlPef/cTFfHapxOpIxYcejGpCrisEKgemjmcPTeeiamRRV1nPt4yvsqmNj+pliLQITAi6aPoTnbjmJwtJabnxyFflFVU5HMiZseHx9BIFghcAck9kj0nnomll8sb+Scx5czLsb7RoDY/qDhnsfgYjcISIbRWSTiPzAiQym/1w4fTDv3HEqo7MS+dFL69lfbkNWG3OsPB4NyMVk4EAhEJGpwLeBE4AZwEUiMi7QOUz/GpGRyGPX59LQ7OHyR5axs7ja6UjGhLRwP2toErBCVWtUtQn4BLjUgRymn43KTOTFBSdR3dDEJX9ayrIvi52OZEzICvc+go3APBHJEJEE4AJgmAM5jB/MHJ7Oq7edTHKcm5ue/oxtB2y0UmP6QlVxBejYUMALgapuAX4LfAC8C6wHjroqSUQWiEieiOQVFRUFOKU5FqMyE3n1tpNJinVz+wtrqWtsdjqSMSEn3A8NoapPqOosVZ0HHAKOmvFEVReqaq6q5mZlZQU+pDkmWcmx/P7KGWzdX8m9b252Oo4xISfsLygTkYG+78OBy4AXnMhh/OuMiQP5zmmjeWHVHj7bdcjpOMaEFI8GZpwhcO46gpdFZDPwJvBdVS11KIfxs9vPHMfA5FhufXY1ByvttFJjeko1jE8fBVDVU1V1sqrOUNWPnMhgAiMx1s1fbz6Byvomvv/8WhqaPE5HMiYkNHsUd7h2FpvIM3FQCr++ZCordx7iP9/d6nQcY0JCs0eJirJCYMLIVbnD+OZJw3li6U5W77b+AmO60+RR3K7AfERbITABc9f5kxiSGs93n1vL3rJap+MYE9SaPUqUHRoy4SYp1s2frptFdX0TVy9cbmMSGdOFJo/H+ghMeDpuWBrP3nIipdWNXPuXFRyssGJgTEesRWDC2nHD0nj6puPZX1HHHS+uo6nZziQypr0mO2vIhLvckQO49+IpLM8v4brHV1JW0+B0JGOCirUITES4MncY/3n5dFbvLuWqx5ZTVFnvdCRjgkZTs501ZCLEVccP4683n8DukhrufXOT03GMCRrNHiVAdcAKgXHe3DGZLJg3mrc+38eLq/Y4HceYoOA9a8haBCaC3HraGE4YOYCfv7HJxiQyBusjMBEoMdbNb6+YTmOzh6eX7nI6jjGOa1Y7a8hEoFGZiVw4bTBPfLqTrw7VOB3HGEc1NVuLwESouy+chAIPfrjN6SjGOKrZo7ht0DkTiQanxnPzKaN4ZU0hK/NLnI5jjGO8fQTWWWwi1O1njmNQShwPvLsVVXU6jjGOsCuLTUSLj4nizrPHsXZPGW+s3+t0HGMcYWcNmYh3+ayhTMhO5ocvrWeFHSIyESjsRx8VkTtFZJOIbBSRF0QkzokcJni5o1w8/+0TGZaewI8XfU6jDUxnIoz3yuIwLQQikgPcDuSq6lQgCvhGoHOY4JeRFMvdF05iz6Eanlq60+k4xgRUfZOHWHd4dxa7gXgRcQMJgB0INh06c+JAzpiQxUP/96WNUGoiircQRAVkWwEvBKpaCPwe2APsA8pV9f3264nIAhHJE5G8oqKiQMc0QUJE+Mn5E6mqb+LRT/KdjmNMQKgqDeHcIhCRdODrwChgCJAoIt9sv56qLlTVXFXNzcrKCnRME0QmDkrhkuNyeGrpTvaU2BXHJvzVN3n7xGKjw7QQAGcBO1W1SFUbgVeAuQ7kMCHkx+dNIMol/PKtzXZtgQl79Y2+QhCuh4bwHhI6SUQSRESA+cAWB3KYEDI4NZ475o/jwy0HeCmvwOk4xvhVqa8/LD0hOiDbc6KPYCWwCFgDbPBlWBjoHCb03HLqaE4aPYD73tlCeU2j03GM8ZviKu9sfRlJsQHZniNnDanqL1R1oqpOVdXrVdXmKDTdinIJv/jaFCrqGnn4n9udjmOM3xRXeVsEGYkxAdmeXVlsQsqkwSlcOXsozyzbbR3HJmy1DMM+ODUw19paITAh59/O9nYc/3DRerbur3A6jjH9bkNhOYNS4sL70JAxx2JQahy/vmQqa/eUcsEfl/C797bS0GRDUJjwsfarUmaNSAvY9qwQmJB0+eyhLLtrPlfMHsqf/rmDbz6+kmaPnVZqQt/aPaV8daiW44ZZITCmW1nJsfznFTP4zaXTWLXrEL95x85CNqHvbyv2kBzn5hsnDA/YNt0B25IxfnLticPZdqCSJz7dydD0eG46eZTTkYzpk0PVDby5fi+XzcohJS4w1xCAFQITJv7fRZMpLKvl3jc3U1bTyJ1nj3c6kjG99traQhqaPQH/Z6ZHh4ZE5A4RSRGvJ0RkjYic4+9wxvRUlEt4+NqZXDR9MP/zf9vZUFDudCRjeu2l1QVMH5rKhEHJAd1uT/sIvqWqFcA5QBZwE/CA31IZ0wex7ih+c9k0MhJjuenpVXxeUOZ0JGN6bGNhOVv2VXDl7KEB33ZPC0HLNDkXAE+p6vo2y4wJGilx0fzh6uMA+MbCFby/ab/DiYzpXnV9E7c9t4b46CgunpET8O33tBCsFpH38RaC90QkGbATt01QOmVcJu/cfirDBySw4NnVzLn/I7YfqHQ6ljEdWrytiCm/eI89h2r476tmkBqggeba6mkhuBm4CzheVWuAaLyHh4wJSgNT4nj9eyfzswsmsq+8jn9/aT21Dc1OxzLmCAcr6/jJy58DcOH0wZw/bbAjOXpaCOYAX6hqmW8Smf8ArDfOBLVYdxQL5o1h4fWz2VhYzlWPLWdHUZXTsUwY8PTDxYtNzR5u+9saDlbW88ptc/nTtbP6IVnf9LQQPALUiMgM4MfAbuCvfktlTD86Z8ogHvnmbArLarn6sRWszC8hv6jqiAluymoaKCyrpbzWhrc2XXvy051M/Pm7nP/HJX065JhfVMWPF61n7N3/IG93KbecOopZw9P9kLTnpCezPYnIGlWdJSI/BwpV9YmWZf6PCLm5uZqXlxeITZkw9uXBSr6xcEXrEL9JsW7iY6JIjIliV5uRTE8aPYCfnDeRmR38cdY1NhMX3btZo/aW1fLMsl00NitjByZxzQnD8M7JZELNmj2lfGPhCnLS4imvbaSxycPLt81lfHYydY3NuESI6WKe4Y2F5Vz2yLLW4VCGpsfz3g/m9fp3qqdEZLWq5na7Xg8LwSfAu8C3gFOBImCdqk471qA9YYXA9JfdJdV8/EURIvDlwSrW7Cklv6ia86YOYuawNPKLq3lq6S4Azp2SzXdOG0OzRymtbuB3733B9oNVpMZHc8PckWSnxJKRGMOp47JIjO342swPNh/g9hfWUtt4uH/iqZuO54wJAwOxu6YfNXuU8/+4mOr6Zl7/3smUVDVwxSPLaPR4mD8pm1U7D9HY7GFwajwHK+oor23E5RJuP3Mst542hs37Krj44aVkJsXyxA25TB+aiiq4XP77p6C/C8Eg4FrgM1VdIiLDgdNVNSCHh6wQGH9RVTzqvSCtRX5RFX/8aDuvr9t7xLrDBsQzf2I2Ty/bdcTy5Fg3P71g0lH/6W8oKOfyR5cxJiuJhdfPpry2kZue/oyiynqev+VE5o7N9Ou+mf71+rpC7nhxne/CxSGA9x+LS/60lNKaRgalxHGopoGGJg8Dk2OprGs64h8AEVCFF759EnPGZAQkc78WAt8LZgPH++6uUtWDfQw2Afh7m0WjgZ+r6h86e44VAuOEF1ft4Znlu0mJczM0PYFfXTKFhBg3dY3N7C6pobHZw9o9pdz75maaPMrJYzMYn51MlAg7i6v5aOtBslNi+ccd8xjgm2lqY2E5Fz30KSKQOyKdwtJaxmYnc8spo1i18xATBye3fsiY4HLxw59S09DM+z+Yd8R/8V8dqmFncTWnjss86pBfXWMzP3t1A6+sKWTumAzuu3QaozITA5a5v1sEVwG/Az7GeyHZqcCPVHXRMYaMAgqBE1V1d2frWSEwwayhycMjH+/g0U92HPEf4JQhKdx/2TSmDz1yOOHiqnp+8fomiqrqyS+qau2zAHC7hNX/72xS4wN/Lrnp3OcFZVz88FLuvXgKN8wd2evnl1TVk54Q49fDQB3p70KwHji7pRUgIlnAh6o64xhDngP8QlVP7mo9KwQmFKgqzR5lX3kdLpeQkxbfo+et2VPKruJqolzCHS+u46Lpg3nYwVMJzdF+vGg9b67fx8q75wd0VNBj1dNC0NPRR13tDgWV0D9zGXwDeKEfXscYx4kI7ihh2ICEXj1v1vD01tMHN+2tYOHifM6butcOEQWJ8ppG3li/l0tnBnZo6EDq6Yf5uyLynojcKCI3Am8D7xzLhkUkBrgYeKmTxxeISJ6I5BUVFR3LpowJGT88ZwLjBibx67e2UNPQ5HQcAyxaU0Bdo4dvnjTC6Sh+06NCoKo/AhYC04EZwEJV/ckxbvt8YI2qHuhkmwtVNVdVc7Oyso5xU8aEhhi3i/svm8b+ijoe/STf6TgRr6nZw5Of7mTW8DSmDEl1Oo7f9HhiGlV9GXi5H7d9DXZYyJij5I4cwMUzhvDYJzu4KncoQ9N7d6jJ9J/l+SUUltVy94WTnI7iV122CESkUkQqOviqFJGKvm5URBKAs4FX+voaxoSzu86fiAjc88Zmp6NEtNfW7iU51s2ZE8P7AsAuC4GqJqtqSgdfyaqa0teNqmqNqmaoqg1cZ0wHhqTFs2DeGD7ccoBlXxY7HSciqSqfbCvizEkD/TYERLDojzN/jDF+cNvpY0iOdfP8qj1OR4lIO4qqKK6qZ26ArgJ2khUCY4JUXHQU504dxJLtxa2DlJnA2VjoPfo9Y1haN2uGPisExgSxsyYNpLy2kedWdnrhvfGTLfsqiIlyMSYryekofmeFwJggds7kQZw2Pov739nK1v0VlFTVOx0pYmzeV8H4QUlER4X/x2T476ExIczlEn59yVQamz2c94clzH3g/1i0uoDGZpsy3J9Ulc17K5g0qM/nxIQUKwTGBLlhAxL4+dcmEx0leFT54UvruaLN5Cam/5VUN1BS3cCEQclORwkIKwTGhIB/mTOS7fddwKZ7z+NrM4awvqCcRz/Z4XSssJVfVA3A2IHh3z8Avbiy2BjjvBi3iz9cfRy1DU387r0vOFBRx51njSfdN9+B6R87iqoAIqKjGKxFYEzIiXIJf75uNpfOzOGvy3cz81cfsGS7DczYn/KLqoh1u3o8lHios0JgTAiKcbt48OrjePxfcslIjOEXr2+yDuR+tKOomlGZiQGfSMYpVgiMCWFnTc7mgcunk19czdce+pTVu0udjhQWdhRVMSZC+gfACoExIe+sSQP51SVTKSit5ceL1uOxs4mOSUOTh68O1TAmgHMLO80KgTEhTkS4/qQR3HfpVHYUVXPWg5+wsdDGc+yrfeW1eJRezzQXyqwQGBMmLp4xhJ9dMJFdxdXc8kwe1fU2w1lfFJTWAkTUPBBWCIwJEyLCgnlj+N/vzGF/RR2PfGzXGfRFYWshiIwzhsAKgTFhJ3fkAC6aPpgnl+60M4n6oKC0BpfAoNQ4p6MEjBUCY8LQ+VMHU9PQzAbrK+i1grJaBqXERcRgcy0c2VMRSRORRSKyVUS2iMgcJ3IYE67mjskgyiW8vrbQ6Sghp6iynqyUyGkNgHMtgj8C76rqRGAGsMWhHMaEpfTEGK6YNZRnlu9m24FKp+OElJKqBjIjbMiOgBcCEUkB5gFPAKhqg6qWBTqHMeHujrPGAbDU5jzulZLqejKSrBD422igCHhKRNaKyOMiEjlXbhgTIEPS4hmYHMuGAusn6CmPR70tgqRYp6MElBOFwA3MAh5R1ZlANXBX+5VEZIGI5IlIXlGRDahlTF9MH5rKugJrcPdURV0jTR4lwwqB3xUABaq60nd/Ed7CcARVXaiquaqam5WVFdCAxoSLmcPTyS+qprym0ekoIaG4qgGATDs05F+quh/4SkQm+BbNBzYHOocxkWDmsDQA1luroEeKfXNC26GhwPg+8JyIfA4cB/zGoRzGhLXpw9IQgTV7bFTSnijxtQgirbPYkRnKVHUdkOvEto2JJEmxbkZnJrJ5b4XTUUKCtQiMMWFp4uAUtu63awl6orTG2yJIT4isFoEVAmPC3KRByew5VENlnXUYd6eqromEmCiiImRmshZWCIwJc5MGpwDYFcY9UFXfRFKsI0fMHWWFwJgwN9FXCLbss0LQncr6JpLirBAYY8LMkNQ4UuLcbNlnHcbdqa5vItlaBMaYcCMiTBycYtNX9kBVXROJVgiMMeHolLGZrC8o550N+5yOEtSsj8AYE7YWzBtNTlo8z6/c43SUoFZZZ30ExpgwFRcdxcXHDeHTL4t5ZU2B03GCVnWD9REYY8LYraeNAeCVNTZrWUdU1foIjDHhLTU+mmtPHM76gjI8HnU6TtCpb/LQ5FE7NGSMCW/HDU2jsq6Jj7YedDpK0KmqbwKwQ0PGmPB2wfTB5KTF89TSnU5HCTpVdd5CYIeGjDFhLSnWzfxJA1n3VRlNzR6n4wSVlhaBnT5qjAl7s0ekU9PQHNYjkn64+QBPLd2Jas/7Qip9LQLrIzDGhL3ckQMAyNt1yOEk/vODv6/j3jc3M+qn71BR10i177/9rlS39hFE+zte0LFCYEyEyUmLZ1RmIve8uZkdRVVOx+l3JVX1rYd5AKbf8z65v/6Q/G72teU5ibFRfs0XjBwpBCKyS0Q2iMg6EclzIoMxkexbJ48E4MlPw6/T+LvPrwFg0a1zOGvSQABqG5t5ZtmuLp9XWR+5h4ac3OMzVLXYwe0bE7GunzOSV9cW8tzKPZw0OoOvzRjidKR+UVJVz4r8Q1w4fTCzhqfz6Ddn41G48alVrO5m3mY7NGSMiTiXzMwB4PsvrKW+qdnhNP3jc98Iq988cQQul+COchHjdjFreDpb9lXy/qb9Rxw2aquqrgmXQFx05H0sOrXHCrwvIqtFZIFDGYyJaNefNII/XzcLgA82H3A4Tf/YvNc758LUnJQjls8cnkazR1nw7Gruf2dLh89tGXlUJLKmqQTnCsHJqjoLOB/4rojMa7+CiCwQkTwRySsqKgp8QmPCnIhw7pRBZKfE8vbn4TE89f7yOtISokmOO/Lwzszh6a23N+7teIKeyrqmo54XKRwpBKq61/f9IPAqcEIH6yxU1VxVzc3Kygp0RGMiQpRLOHVcFivyS3p1zn2wKqmuJyMx5qjlAxJjeOKGXOaMzmD7gUo8HmVjYTkHKupa16mO0LkIwIFCICKJIpLcchs4B9gY6BzGGK/jhqVRWtNIQWmt01GOWXFVAxlJsR0+Nn9SNpfNyqGmoZl/bNzPRQ99ytWPLW/tH6mqb4rIU0fBmRZBNvCpiKwHVgFvq+q7DuQwxgAzhqYB8FYYHB4qqaonM+noFkGLqTmpwOFTTHeV1LQeFvNOXG+HhgJCVfNVdYbva4qq3hfoDMaYwyYNTua4YWksXLyDhqbQHn+opLqBjMSOWwQAE7KTGTYgHoDvnjGGjMQYlmz3nsVeVdcYkSOPgrPXERhjgoA7ysW/nj6G7zy7mrxdh5g7NtPpSH3S2OyhrKaRjC5aBC6XsOjWubhEyEqOZc+hWj79shhVpby2iZT4yPxIjLwTZo0xRzllbCZx0S6ufXwl1z2+guU7SpyO1Gul1Q0AnfYRtMhOiSMr2bvOqeMyKaqs59kVuymuqiezm+eGKysExhgSY93cdvpYAJZ+WcI1f1kRcmcRFVd5C0FmB2cNdebcyYNIT4jm569vAuDiMLnCuresEBhjAPj+mWN5+qbjW++H2llEJdX1QPctgrZSE6J543unkJ0Sy7+fPZ5x2cn+ihfUrBAYYwDvBWanTxjI6989GYCNvuEaQkVJVcuhoZ63CACGDUhg2V3z+f78cf6IFRKsEBhjjjBhUDJul7AhxApBcZW3RZDZxVlDnYlyRd6wEm1ZITDGHCEuOopx2ckhVwhKqhuIjpKIPfPnWFghMMYcZVpOCku2F1Pi+y87FBRX1pORGBuRg8YdKysExpijnDN5EAAvfvYVqspLeV+xtyy4O49Lqht63T9gvKwQGGOOctbkbCZkJ7Miv4RNeyv40aLP+dmrG5yO1aWiynoG9OLUUXOYFQJjTIdOHD2AJduL+cHf1wGHL9gKRg1NHrYdqGR8hJ7+eaysV8UY06HrThzBX5fv5suD3knfK+uOnNlr+4FKXl+3lw2F5QxKiePXl04lOsqZ/y237KugvsnDrDbzDpies0JgjOnQhEHJrPjpfB76v+1sLCxn094Kmj1KlEs4WFHH2Q8uPmL94RkJfPeMsV2+Zt6uQ/zs1Q3ccsporjp+WL9lXeubj3jm8LR+e81IYoeGjDGdGpQax32XTuPK3GE0eZSiSu9ZRC1DMgCMzEggJc7NIx/vYP1XZRysrOvwtVSVHy/6nG0CjAbcAAAObklEQVQHqnj4n1/22xAWa/eUcs+bm0mIiWJIWny/vGaksRaBMaZbOb4P2MKyWtISovl420FumDOC6+eMICspjt2Hqrn44aV8/U9LiXW72HTvubjbHSb64kAl+cXVzBiayvqCclbtPMTwjAQGp/b9w/ufXxzkpqc+A+DGuSP7/DqRzloExphutfynvbesluU7Sqhr9HDmpGzGDkwmNSGaaTmpzBvvnVK2vsnDqp2HWp/b2Ozhjx9u57w/LAHgv66aQXx0FFcvXMEpv/0nXx2q6XOuf//f9YB3noE7zx7f59eJdFYIjDHdGjYgHrdLWLOnlA+2HCAxJoqTRg9ofVxEeOam49l077mIwMo2heChj7bz4IfbABg7MImxA5M5f5r3OoVmj/L6usJe5/ls1yF2FFW1Hl569PrZjnVUhwM7NGSM6VZCjJvZI9J5Ka+AuOgoTpuQRaz7yPl9RYTEWDcjBiSw/WBl6/LFvhnA/nTtLGaP8J7Vs2DeaIoq61myvZg1e8p6laWmoYkrH13eev/+y6YxKjOxr7tmcLBFICJRIrJWRN5yKoMxpue+flwOVfVNFFfVc9ak7E7XG5edzBf7vYXgtbWFrPuqjNvnj+PC6YMZlBoHwMRBKTx784lclTuUtXtKe9Vx3PLaLc6bMqgPe2PacrItdQewxcHtG2N64bJZOa23z5gwsNP1JmQns6ukhs92HWq9GO1bJ4/scN1Zw9MprWnkp6/0/KrlHUXVAJwwcgD3XTqVdLua+Jg5UghEZChwIfC4E9s3xvReXHQUT914PL+/ckaXH75nTMyi2aNc+ehyEmOiWHbXmaQldLz+WZO9LYsXP/uKbQcqefCDbeQXVXWZY0dRFdFRwnPfPpHrThzR9x0yrZxqEfwB+DHg6WwFEVkgInkikldUVBS4ZMaYTp0xcSBXzB7a5TqzRwxg7pgMAP5l7sguz+3PTIrlgzvnAXDOg4v540fb+cnLn3f5+pv2VjAmK8k6h/tRwH+SInIRcFBVV3e1nqouVNVcVc3NysoKUDpjTH/4r6tm8NvLp/HvPTilc0xW0hH3P9tVyp6Sjk8pVVU2FJQxY6hdQdyfnCipJwMXi8gu4EXgTBH5mwM5jDF+Mjg1nquPH37URWUdcbmEB6+ewb+dPZ43vuedJvO9Tfs7XPfzgnJKaxqZPdLGFOpPAT99VFV/CvwUQEROB36oqt8MdA5jTPC4dObhw01Tc1J4/NN8/mXuiKNOUf3bit0kxbo5d7KdKdSf7CCbMSaofPvU0RyoqGfb/iM7javrm3h3037OnTKI1IRoh9KFJ0cLgap+rKoXOZnBGBNcJg9OASC/+MhC8PSyXVTWNXHdScOdiBXWrEVgjAkqOenes4wKSg9PjenxKH9bsZvTxmfZnAN+YIXAGBNUEmLcpCdEHzFH8vqCMvaV1x1xUZvpP1YIjDFBZ0haPIVtCsES33hFp4zNdCpSWLNCYIwJOjlp8RT6Dg1t3lvBf3+wjSlDUshIinU4WXiyQmCMCTrjspPYWVxNaXUDb6zfC8DvrpjhcKrwZYXAGBN0zpyYTZNHWbajhI+/OMic0RlMHpLidKywZYXAGBN0ZgxNJSEmijfWF7J1fyWnT7BhZvzJCoExJui4o1xMGZLCe5sOAHDmxM6HvTbHzgqBMSYoff/Mcd65iM8az7jsZKfjhDWbqtIYE5Tmjc9i3ng7JBQI1iIwxpgIZ4XAGGMinBUCY4yJcFYIjDEmwlkhMMaYCGeFwBhjIpwVAmOMiXBWCIwxJsKJqjqdoVsiUg5sb7MoFSjv4nbL90yguI+bbfu6vX28/WNd3e/udl/3obv8Xa3T0fLOcnb2mJPvQXf529/vKj8Ex3vgxO9QV/m6e7w/3wP7O+7bOqlAmqp2f1Weqgb9F7Cws/sd3W7zPa+/ttmbx7vK25P8/bEP3eXvap2OlneWMxjfg+7y9+Z3KFjeAyd+h4LlPbC/4/75HerqK1QODb3Zxf2Obrdfvz+22ZvHu8rb/n5PbvdFT57f2TodLe8qW7C9B93lb3/fqfxdrRMMv0M9eY1Qfw8i7e+4QyFxaKivRCRPVXOdznEsQn0fQj0/hP4+WH7nBfs+hEqLoK8WOh2gH4T6PoR6fgj9fbD8zgvqfQjrFoExxpjuhXuLwBhjTDesEBhjTISzQmCMMREuYguBiJwqIo+KyOMisszpPL0lIi4RuU9EHhKRG5zO0xcicrqILPG9D6c7nacvRCRRRFaLyEVOZ+kLEZnk+/kvEpF/dTpPb4nIJSLyFxF5XUTOcTpPX4jIaBF5QkQWOZUhJAuBiDwpIgdFZGO75eeJyBci8qWI3NXVa6jqElW9FXgLeMafedvrj/zA14EcoBEo8FfWzvTTPihQBcQR4H3op/wAPwH+1z8pu9ZPfwdbfH8HVwEBPb2xn/K/pqrfBm4ErvZj3A710z7kq+rN/k3ajb5esefkFzAPmAVsbLMsCtgBjAZigPXAZGAa3g/7tl8D2zzvf4GUUMsP3AV8x/fcRaH4HgAu3/OygedCMP9ZwDfwfghdFIrvge85FwPLgGtDMb/vef8FzArV98D3vID/Hbd8heTk9aq6WERGtlt8AvClquYDiMiLwNdV9X6gw2a7iAwHylW1wo9xj9If+UWkAGjw3W32X9qO9dd74FMKxPojZ2f66T04A0jE+0deKyLvqKrHr8Hb6K/3QFXfAN4QkbeB5/2X+Kjt9sd7IMADwD9UdY1/Ex+tn/8OHBOShaATOcBXbe4XACd285ybgaf8lqh3epv/FeAhETkVWOzPYL3Qq30QkcuAc4E04GH/RuuRXuVX1bsBRORGoDiQRaALvX0PTgcuw1uI3/Frsp7p7d/B9/G2zFJFZKyqPurPcD3U2/cgA7gPmCkiP/UVjIAKp0IgHSzr8mo5Vf2Fn7L0Ra/yq2oN3kIWTHq7D6/gLWjBote/QwCq+nT/R+mz3r4HHwMf+ytMH/Q2//8A/+O/OH3S230oAW71X5zuhWRncScKgGFt7g8F9jqUpS9CPT+E/j6Een4I/X0I9fwQgvsQToXgM2CciIwSkRi8nXhvOJypN0I9P4T+PoR6fgj9fQj1/BCK++BUL/Ux9tS/AOzj8KmTN/uWXwBsw9tjf7fTOcM1fzjsQ6jnD4d9CPX84bIPqmqDzhljTKQLp0NDxhhj+sAKgTHGRDgrBMYYE+GsEBhjTISzQmCMMRHOCoExxkQ4KwSm34lIVQC2cXEPh4nuz22eLiJz+/C8mSLyuO/2jSISDOMqISIj2w+f3ME6WSLybqAyGWdYITBBS0SiOntMVd9Q1Qf8sM2uxt86Heh1IQB+BjzUp0AOU9UiYJ+InOx0FuM/VgiMX4nIj0TkMxH5XETubbP8NfHO7LVJRBa0WV4lIr8UkZXAHBHZJSL3isgaEdkgIhN967X+Zy0iT4vI/4jIMhHJF5ErfMtdIvJn3zbeEpF3Wh5rl/FjEfmNiHwC3CEiXxORlSKyVkQ+FJFs31DDtwJ3isg68c5wlyUiL/v277OOPixFJBmYrqrrO3hshIh85PvZfOQbFh0RGSMiK3yv+cuOWljinRntbRFZLyIbReRq3/LjfT+H9SKySkSSff/5L/H9DNd01KoRkSgR+V2b9+o7bR5+DbiuwzfYhAenL222r/D7Aqp8388BFuIdjdGFdyKOeb7HBvi+xwMbgQzffQWuavNau4Dv+27fBjzuu30j8LDv9tPAS75tTMY7FjzAFXiHVnYBg/DOe3BFB3k/Bv7c5n46tF51fwvwX77b9wA/bLPe88ApvtvDgS0dvPYZwMtt7rfN/SZwg+/2t4DXfLffAq7x3b615efZ7nUvB/7S5n4q3klQ8oHjfctS8I4wnADE+ZaNA/J8t0fim1AFWAD8h+92LJAHjPLdzwE2OP17ZV/++wqnYahN8DnH97XWdz8J7wfRYuB2EbnUt3yYb3kJ3kl2Xm73Oi1DVa/GO3Z+R15T73wAm0Uk27fsFOAl3/L9IvLPLrL+vc3tocDfRWQw3g/XnZ085yxgsnduFABSRCRZVSvbrDMYKOrk+XPa7M+zwH+2WX6J7/bzwO87eO4G4Pci8lvgLVVdIiLTgH2q+hmA+iZcEpFE4GEROQ7vz3d8B693DjC9TYspFe97shM4CAzpZB9MGLBCYPxJgPtV9bEjFnonQzkLmKOqNSLyMd55iwHqVLX9jGv1vu/NdP47W9/mtrT73hPVbW4/BPy3qr7hy3pPJ89x4d2H2i5et5bD+9adHg/8parbRGQ23sHN7heR9/EewunoNe4EDgAzfJnrOlhH8La83uvgsTi8+2HClPURGH96D/iWiCQBiEiOiAzE+99mqa8ITARO8tP2PwUu9/UVZOPt7O2JVKDQd/uGNssrgeQ2998Hvtdyx/cfd3tbgLGdbGcZ3iGKwXsM/lPf7RV4D/3Q5vEjiMgQoEZV/4a3xTAL2AoMEZHjfesk+zq/U/G2FDzA9Xjn1G3vPeBfRSTa99zxvpYEeFsQXZ5dZEKbFQLjN6r6Pt5DG8tFZAOwCO8H6buAW0Q+B36F94PPH17GOzTwRuAxYCVQ3oPn3QO8JCJLgOI2y98ELm3pLAZuB3J9naub6WCWKVXdincaxeT2j/mef5Pv53A9cIdv+Q+AfxORVXgPLXWUeRqwSkTWAXcDv1bVBuBqvFOYrgc+wPvf/J+BG0RkBd4P9eoOXu9xYDOwxndK6WMcbn2dAbzdwXNMmLBhqE1YE5EkVa0S77ywq4CTVXV/gDPcCVSq6uM9XD8BqFVVFZFv4O04/rpfQ3adZzHeyddLncpg/Mv6CEy4e0tE0vB2+v4q0EXA5xHgyl6sPxtv564AZXjPKHKEiGTh7S+xIhDGrEVgjDERzvoIjDEmwlkhMMaYCGeFwBhjIpwVAmOMiXBWCIwxJsJZITDGmAj3/wFxorhCE7yw9wAAAABJRU5ErkJggg==\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7fa10b07e4e0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#find suitable learning rates\n",
"learner.lr_find(1e-07, 1e2)\n",
"learner.sched.plot()"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"lr = 1e-3\n",
"lrs = lr"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7a1e2d12bd954b3eab51080184821a40",
"version_major": 2,
"version_minor": 0
},
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
"<p>\n",
" If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"https://ipywidgets.readthedocs.io/en/stable/user_install.html\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
"</p>\n",
"<p>\n",
" If you're reading this message in another frontend (for example, a static\n",
" rendering on GitHub or <a href=\"https://nbviewer.jupyter.org/\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"</p>\n"
],
"text/plain": [
"HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" 0%| | 5/5657 [00:01<29:30, 3.19it/s, loss=11] \n",
" 0%| | 6/5657 [00:01<28:00, 3.36it/s, loss=11]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Exception in thread Thread-11:\n",
"Traceback (most recent call last):\n",
" File \"/opt/conda/envs/fastai/lib/python3.6/threading.py\", line 916, in _bootstrap_inner\n",
" self.run()\n",
" File \"/opt/conda/envs/fastai/lib/python3.6/site-packages/tqdm/_tqdm.py\", line 144, in run\n",
" for instance in self.tqdm_cls._instances:\n",
" File \"/opt/conda/envs/fastai/lib/python3.6/_weakrefset.py\", line 60, in __iter__\n",
" for itemref in self.data:\n",
"RuntimeError: Set changed size during iteration\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch trn_loss val_loss accuracy \n",
" 0 3.387137 3.396025 0.547392 \n",
"\n"
]
},
{
"data": {
"text/plain": [
"[array([3.39602]), 0.547391530683386]"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"learner.fit(lr, 1, wds=wd, use_clr=(32,2), cycle_len=1) # last layer is the embedding weights"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"learner.save('lm_telugu_fromscratch')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"learner.load('lm_telugu_fromscratch')"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "eb366b95415c43ceac50074c42acde17",
"version_major": 2,
"version_minor": 0
},
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
"<p>\n",
" If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"https://ipywidgets.readthedocs.io/en/stable/user_install.html\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
"</p>\n",
"<p>\n",
" If you're reading this message in another frontend (for example, a static\n",
" rendering on GitHub or <a href=\"https://nbviewer.jupyter.org/\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"</p>\n"
],
"text/plain": [
"HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch trn_loss val_loss accuracy \n",
" 0 3.139451 3.198013 0.558166 \n",
"\n"
]
},
{
"data": {
"text/plain": [
"[array([3.19801]), 0.5581661824732331]"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"learner.fit(lrs, 1, wds=wd, use_clr=(20,10), cycle_len=1)"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"24.483832456834897"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# perplexity approximation\n",
"math.exp(3.198013)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9d05b9e48fff418785f767f94282e1dc",
"version_major": 2,
"version_minor": 0
},
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
"<p>\n",
" If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"https://ipywidgets.readthedocs.io/en/stable/user_install.html\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
"</p>\n",
"<p>\n",
" If you're reading this message in another frontend (for example, a static\n",
" rendering on GitHub or <a href=\"https://nbviewer.jupyter.org/\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"</p>\n"
],
"text/plain": [
"HBox(children=(IntProgress(value=0, description='Epoch', max=15), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch trn_loss val_loss accuracy \n",
" 0 3.168684 3.187562 0.556568 \n",
" 34%|███▍ | 1937/5657 [06:46<13:00, 4.76it/s, loss=3.36]"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-56-4dbe5f88ace9>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mlearner\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlrs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwds\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwd\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muse_clr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m20\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcycle_len\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m15\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/notebooks/courses/dl2/fastai/learner.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, lrs, n_cycle, wds, **kwargs)\u001b[0m\n\u001b[1;32m 250\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msched\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 251\u001b[0m \u001b[0mlayer_opt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_layer_opt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlrs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 252\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_gen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlayer_opt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_cycle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 253\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 254\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mwarm_up\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwds\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/notebooks/courses/dl2/fastai/learner.py\u001b[0m in \u001b[0;36mfit_gen\u001b[0;34m(self, model, data, layer_opt, n_cycle, cycle_len, cycle_mult, cycle_save_name, best_save_name, use_clr, use_clr_beta, metrics, callbacks, use_wd_sched, norm_wds, wds_sched_mult, **kwargs)\u001b[0m\n\u001b[1;32m 197\u001b[0m \u001b[0mn_epoch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msum_geom\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcycle_len\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcycle_len\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcycle_mult\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_cycle\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 198\u001b[0m return fit(model, data, n_epoch, layer_opt.opt, self.crit,\n\u001b[0;32m--> 199\u001b[0;31m metrics=metrics, callbacks=callbacks, reg_fn=self.reg_fn, clip=self.clip, **kwargs)\n\u001b[0m\u001b[1;32m 200\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 201\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_layer_groups\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodels\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_layer_groups\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/notebooks/courses/dl2/fastai/model.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(model, data, epochs, opt, crit, metrics, callbacks, stepper, **kwargs)\u001b[0m\n\u001b[1;32m 123\u001b[0m \u001b[0mbatch_num\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcb\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mcb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_batch_begin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 125\u001b[0;31m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstepper\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mV\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mV\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepoch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 126\u001b[0m \u001b[0mavg_loss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mavg_loss\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mavg_mom\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mloss\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mavg_mom\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 127\u001b[0m \u001b[0mdebias_loss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mavg_loss\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mavg_mom\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mbatch_num\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/notebooks/courses/dl2/fastai/model.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, xs, y, epoch)\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mraw_loss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcrit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreg_fn\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreg_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxtra\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mraw_loss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 53\u001b[0;31m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 54\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclip\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# Gradient clipping\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclip_grad_norm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrainable_params_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mm\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclip\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/envs/fastai/lib/python3.6/site-packages/torch/autograd/variable.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph, retain_variables)\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0mVariable\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 166\u001b[0m \"\"\"\n\u001b[0;32m--> 167\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_variables\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 168\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 169\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/envs/fastai/lib/python3.6/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(variables, grad_variables, retain_graph, create_graph, retain_variables)\u001b[0m\n\u001b[1;32m 97\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 98\u001b[0m Variable._execution_engine.run_backward(\n\u001b[0;32m---> 99\u001b[0;31m variables, grad_variables, retain_graph)\n\u001b[0m\u001b[1;32m 100\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"learner.fit(lrs, 1, wds=wd, use_clr=(20,10), cycle_len=15)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [],
"source": [
"learner.save('lm_telugu_fromscratch_1_partial')"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"learner.save_encoder('adam1_enc')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"m = learner.model\n",
"# pickle.dump(m,open(f'wiki_lang.pkl','wb'))"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"SequentialRNN(\n",
" (0): RNN_Encoder(\n",
" (encoder): Embedding(60002, 400, padding_idx=1)\n",
" (encoder_with_dropout): EmbeddingDropout(\n",
" (embed): Embedding(60002, 400, padding_idx=1)\n",
" )\n",
" (rnns): ModuleList(\n",
" (0): WeightDrop(\n",
" (module): LSTM(400, 1150, dropout=0.105)\n",
" )\n",
" (1): WeightDrop(\n",
" (module): LSTM(1150, 1150, dropout=0.105)\n",
" )\n",
" (2): WeightDrop(\n",
" (module): LSTM(1150, 400, dropout=0.105)\n",
" )\n",
" )\n",
" (dropouti): LockedDropout(\n",
" )\n",
" (dropouths): ModuleList(\n",
" (0): LockedDropout(\n",
" )\n",
" (1): LockedDropout(\n",
" )\n",
" (2): LockedDropout(\n",
" )\n",
" )\n",
" )\n",
" (1): LinearDecoder(\n",
" (decoder): Linear(in_features=400, out_features=60002)\n",
" (dropout): LockedDropout(\n",
" )\n",
" )\n",
")"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# TEXT = pickle.load(open(f'{PATH}models/TEXT.pkl','rb'))\n",
"# m = pickle.load(open(f'{PATH}models/wiki_lang.pkl','rb'))\n",
"m[0].bs=1\n",
"m.eval()"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"def gen_text(ss,topk):\n",
" s = [tokenize(ss)]\n",
" t = TEXT.numericalize(s)\n",
" m.reset()\n",
" pred,*_ = m(t)\n",
" pred_i = torch.topk(pred[-1], topk)[1]\n",
" return [TEXT.vocab.itos[o] for o in to_np(pred_i)]\n",
"\n",
"def gen_sentences(ss,nb_words):\n",
" result = []\n",
" s = [tokenize(ss)]\n",
" t = TEXT.numericalize(s)\n",
" m.reset()\n",
" pred,*_ = m(t)\n",
" for i in range(nb_words):\n",
" pred_i = pred[-1].topk(2)[1]\n",
" pred_i = pred_i[1] if pred_i.data[0] < 2 else pred_i[0]\n",
" result.append(TEXT.vocab.itos[pred_i.data[0]])\n",
" pred,*_ = m(pred_i[0].unsqueeze(0))\n",
" return(result)"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
"TEXT = pickle.load(open(f'data/teluguwiki/models/TEXT.pkl','rb'))"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['<unk>',\n",
" '1',\n",
" 'బాగా',\n",
" 'మరియు',\n",
" 'కింది',\n",
" 'ఈ',\n",
" '31',\n",
" 'వస్తుంది',\n",
" 'విడుదల',\n",
" 'లేవు']"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_sentence = \"ఆయన కుటుంబం ఆయనకు వారి స్తోమత ప్రకారం వైద్యాన్ని అందించింది.\"\n",
"gen_text(test_sentence, 10)"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'1కేంద్రంtotototoగుర్తిస్తూఈకేంద్రంtoగుర్తిస్తూఈకేంద్రంకానిఉత్పత్తి:కేంద్రంఉత్పత్తి:?నుండి,ప్రాథమికలోజిల్లాచేసినవచ్చాయిఆగష్టునీపాఠశాలనుండిబాగాపాఠశాలమూడుకేంద్రం235అన్నికింది%ఇంటింటికీఅక్కడేకిఇంకాగంటలఆఫీసుఅలోపతిఅవసరాల./కంకర'"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"''.join(gen_sentences(test_sentence, 50))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment