Created
January 28, 2022 18:06
-
-
Save nymwa/766b0255aa40c776d9608752616c5791 to your computer and use it in GitHub Desktop.
bert_nimi_prediction.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "bert_nimi_prediction.ipynb", | |
"provenance": [], | |
"authorship_tag": "ABX9TyNlHcTDS1Fbq/X7ZgfSTUPD", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
}, | |
"accelerator": "GPU" | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/nymwa/766b0255aa40c776d9608752616c5791/bert_nimi_prediction.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"↑ push \"Open in Colab\"\n", | |
"\n", | |
"↓ run block (▶) one by one " | |
], | |
"metadata": { | |
"id": "1MaK3FCQnUC9" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "EyABZ4g-eNpA", | |
"outputId": "12abc96a-8274-4d96-f231-0654c4491655" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"Cloning into 'ilonimi'...\n", | |
"Note: checking out '5224a4c36c5a636afc1c925be198d0f8c17d435a'.\n", | |
"\n", | |
"You are in 'detached HEAD' state. You can look around, make experimental\n", | |
"changes and commit them, and you can discard any commits you make in this\n", | |
"state without impacting any branches by performing another checkout.\n", | |
"\n", | |
"If you want to create a new branch to retain commits you create, you may\n", | |
"do so (now or later) by using -b with the checkout command again. Example:\n", | |
"\n", | |
" git checkout -b <new-branch-name>\n", | |
"\n", | |
"HEAD is now at 5224a4c add ignore set\n", | |
"Cloning into 'ponask'...\n", | |
"Note: checking out 'fadc92ddca0422009e10d92d22aabbb468ab88dc'.\n", | |
"\n", | |
"You are in 'detached HEAD' state. You can look around, make experimental\n", | |
"changes and commit them, and you can discard any commits you make in this\n", | |
"state without impacting any branches by performing another checkout.\n", | |
"\n", | |
"If you want to create a new branch to retain commits you create, you may\n", | |
"do so (now or later) by using -b with the checkout command again. Example:\n", | |
"\n", | |
" git checkout -b <new-branch-name>\n", | |
"\n", | |
"HEAD is now at fadc92d add\n", | |
"Downloading...\n", | |
"From: https://drive.google.com/uc?id=1d3XjRHEp11jAbNiSU7mRt-f8zpu3BVxK\n", | |
"To: /content/bert_big.pt\n", | |
"\r 0%| | 0.00/153M [00:00<?, ?B/s]\r 6%|▌ | 8.91M/153M [00:00<00:04, 34.7MB/s]\r 12%|█▏ | 17.8M/153M [00:00<00:02, 53.2MB/s]\r 18%|█▊ | 26.7M/153M [00:00<00:01, 65.0MB/s]\r 29%|██▉ | 44.6M/153M [00:00<00:01, 101MB/s] \r 39%|███▉ | 59.2M/153M [00:00<00:00, 93.9MB/s]\r 46%|████▌ | 70.3M/153M [00:00<00:00, 95.3MB/s]\r 55%|█████▍ | 83.4M/153M [00:00<00:00, 104MB/s] \r 62%|██████▏ | 94.9M/153M [00:01<00:00, 106MB/s]\r 71%|███████ | 109M/153M [00:01<00:00, 113MB/s] \r 79%|███████▉ | 121M/153M [00:01<00:00, 107MB/s]\r 87%|████████▋ | 132M/153M [00:01<00:00, 93.9MB/s]\r 93%|█████████▎| 142M/153M [00:01<00:00, 90.6MB/s]\r100%|██████████| 153M/153M [00:01<00:00, 92.6MB/s]\n" | |
] | |
} | |
], | |
"source": [ | |
"%%bash\n", | |
"\n", | |
"git clone https://github.com/nymwa/ilonimi.git\n", | |
"mv ilonimi ilonimi.d\n", | |
"cd ilonimi.d\n", | |
"git checkout 5224a4c36c5a636afc1c925be198d0f8c17d435a\n", | |
"mv ilonimi ../\n", | |
"cd ..\n", | |
"\n", | |
"git clone https://github.com/nymwa/ponask.git\n", | |
"mv ponask ponask.d\n", | |
"cd ponask.d\n", | |
"git checkout fadc92ddca0422009e10d92d22aabbb468ab88dc\n", | |
"mv ponask ../\n", | |
"cd ..\n", | |
"\n", | |
"gdown --id \"1d3XjRHEp11jAbNiSU7mRt-f8zpu3BVxK\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from ilonimi import Normalizer, Tokenizer, Splitter\n", | |
"import torch\n", | |
"import torch\n", | |
"from torch.nn.utils.rnn import pad_sequence as pad\n", | |
"from ponask.vocab import Vocab\n", | |
"from ponask.bert import BERT\n", | |
"from ponask.batch import Batch\n", | |
"from tabulate import tabulate" | |
], | |
"metadata": { | |
"id": "gUKBYZ8kjrPH" | |
}, | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"def load_vocab(path):\n", | |
" with open(path) as f:\n", | |
" tokens = [x.strip() for x in f]\n", | |
" vocab = Vocab(tokens)\n", | |
" return vocab\n", | |
"\n", | |
"vocab = load_vocab('ponask.d/work/vocab.txt')\n", | |
"normalizer = Normalizer()\n", | |
"tokenizer = Tokenizer(\n", | |
" convert_unk = True,\n", | |
" convert_number = False,\n", | |
" convert_proper = False,\n", | |
" ignore_set = {'<msk>'})\n", | |
"splitter = Splitter(sharp = False)\n", | |
"\n", | |
"model = BERT(len(vocab), 512, 8, 2048, 0.3, 0.2, 0.2, 12, 64)\n", | |
"model.load_state_dict(torch.load('bert_big.pt', map_location = 'cpu'))\n", | |
"model = model.cuda()\n", | |
"model.eval()" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "VGD7_wGbeUT5", | |
"outputId": "9ad1bf64-a94b-40b2-f494-2406f174c009" | |
}, | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"BERT(\n", | |
" (embedding): TransformerEmbedding(\n", | |
" (token_embedding): Embedding(241, 512, padding_idx=0)\n", | |
" (position_embedding): SinusoidalPositionalEmbedding(\n", | |
" (embedding): Embedding(64, 512)\n", | |
" )\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.3, inplace=False)\n", | |
" )\n", | |
" (encoder): TransformerEncoder(\n", | |
" (layers): ModuleList(\n", | |
" (0): TransformerEncoderLayer(\n", | |
" (self_attn_layer): SelfAttentionSubLayer(\n", | |
" (self_attn): MultiheadAttention(\n", | |
" (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)\n", | |
" )\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.3, inplace=False)\n", | |
" )\n", | |
" (feed_forward_layer): FeedForwardSubLayer(\n", | |
" (linear1): Linear(in_features=512, out_features=2048, bias=True)\n", | |
" (linear2): Linear(in_features=2048, out_features=512, bias=True)\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.2, inplace=False)\n", | |
" (activation): GELU()\n", | |
" )\n", | |
" )\n", | |
" (1): TransformerEncoderLayer(\n", | |
" (self_attn_layer): SelfAttentionSubLayer(\n", | |
" (self_attn): MultiheadAttention(\n", | |
" (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)\n", | |
" )\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.3, inplace=False)\n", | |
" )\n", | |
" (feed_forward_layer): FeedForwardSubLayer(\n", | |
" (linear1): Linear(in_features=512, out_features=2048, bias=True)\n", | |
" (linear2): Linear(in_features=2048, out_features=512, bias=True)\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.2, inplace=False)\n", | |
" (activation): GELU()\n", | |
" )\n", | |
" )\n", | |
" (2): TransformerEncoderLayer(\n", | |
" (self_attn_layer): SelfAttentionSubLayer(\n", | |
" (self_attn): MultiheadAttention(\n", | |
" (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)\n", | |
" )\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.3, inplace=False)\n", | |
" )\n", | |
" (feed_forward_layer): FeedForwardSubLayer(\n", | |
" (linear1): Linear(in_features=512, out_features=2048, bias=True)\n", | |
" (linear2): Linear(in_features=2048, out_features=512, bias=True)\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.2, inplace=False)\n", | |
" (activation): GELU()\n", | |
" )\n", | |
" )\n", | |
" (3): TransformerEncoderLayer(\n", | |
" (self_attn_layer): SelfAttentionSubLayer(\n", | |
" (self_attn): MultiheadAttention(\n", | |
" (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)\n", | |
" )\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.3, inplace=False)\n", | |
" )\n", | |
" (feed_forward_layer): FeedForwardSubLayer(\n", | |
" (linear1): Linear(in_features=512, out_features=2048, bias=True)\n", | |
" (linear2): Linear(in_features=2048, out_features=512, bias=True)\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.2, inplace=False)\n", | |
" (activation): GELU()\n", | |
" )\n", | |
" )\n", | |
" (4): TransformerEncoderLayer(\n", | |
" (self_attn_layer): SelfAttentionSubLayer(\n", | |
" (self_attn): MultiheadAttention(\n", | |
" (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)\n", | |
" )\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.3, inplace=False)\n", | |
" )\n", | |
" (feed_forward_layer): FeedForwardSubLayer(\n", | |
" (linear1): Linear(in_features=512, out_features=2048, bias=True)\n", | |
" (linear2): Linear(in_features=2048, out_features=512, bias=True)\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.2, inplace=False)\n", | |
" (activation): GELU()\n", | |
" )\n", | |
" )\n", | |
" (5): TransformerEncoderLayer(\n", | |
" (self_attn_layer): SelfAttentionSubLayer(\n", | |
" (self_attn): MultiheadAttention(\n", | |
" (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)\n", | |
" )\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.3, inplace=False)\n", | |
" )\n", | |
" (feed_forward_layer): FeedForwardSubLayer(\n", | |
" (linear1): Linear(in_features=512, out_features=2048, bias=True)\n", | |
" (linear2): Linear(in_features=2048, out_features=512, bias=True)\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.2, inplace=False)\n", | |
" (activation): GELU()\n", | |
" )\n", | |
" )\n", | |
" (6): TransformerEncoderLayer(\n", | |
" (self_attn_layer): SelfAttentionSubLayer(\n", | |
" (self_attn): MultiheadAttention(\n", | |
" (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)\n", | |
" )\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.3, inplace=False)\n", | |
" )\n", | |
" (feed_forward_layer): FeedForwardSubLayer(\n", | |
" (linear1): Linear(in_features=512, out_features=2048, bias=True)\n", | |
" (linear2): Linear(in_features=2048, out_features=512, bias=True)\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.2, inplace=False)\n", | |
" (activation): GELU()\n", | |
" )\n", | |
" )\n", | |
" (7): TransformerEncoderLayer(\n", | |
" (self_attn_layer): SelfAttentionSubLayer(\n", | |
" (self_attn): MultiheadAttention(\n", | |
" (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)\n", | |
" )\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.3, inplace=False)\n", | |
" )\n", | |
" (feed_forward_layer): FeedForwardSubLayer(\n", | |
" (linear1): Linear(in_features=512, out_features=2048, bias=True)\n", | |
" (linear2): Linear(in_features=2048, out_features=512, bias=True)\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.2, inplace=False)\n", | |
" (activation): GELU()\n", | |
" )\n", | |
" )\n", | |
" (8): TransformerEncoderLayer(\n", | |
" (self_attn_layer): SelfAttentionSubLayer(\n", | |
" (self_attn): MultiheadAttention(\n", | |
" (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)\n", | |
" )\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.3, inplace=False)\n", | |
" )\n", | |
" (feed_forward_layer): FeedForwardSubLayer(\n", | |
" (linear1): Linear(in_features=512, out_features=2048, bias=True)\n", | |
" (linear2): Linear(in_features=2048, out_features=512, bias=True)\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.2, inplace=False)\n", | |
" (activation): GELU()\n", | |
" )\n", | |
" )\n", | |
" (9): TransformerEncoderLayer(\n", | |
" (self_attn_layer): SelfAttentionSubLayer(\n", | |
" (self_attn): MultiheadAttention(\n", | |
" (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)\n", | |
" )\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.3, inplace=False)\n", | |
" )\n", | |
" (feed_forward_layer): FeedForwardSubLayer(\n", | |
" (linear1): Linear(in_features=512, out_features=2048, bias=True)\n", | |
" (linear2): Linear(in_features=2048, out_features=512, bias=True)\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.2, inplace=False)\n", | |
" (activation): GELU()\n", | |
" )\n", | |
" )\n", | |
" (10): TransformerEncoderLayer(\n", | |
" (self_attn_layer): SelfAttentionSubLayer(\n", | |
" (self_attn): MultiheadAttention(\n", | |
" (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)\n", | |
" )\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.3, inplace=False)\n", | |
" )\n", | |
" (feed_forward_layer): FeedForwardSubLayer(\n", | |
" (linear1): Linear(in_features=512, out_features=2048, bias=True)\n", | |
" (linear2): Linear(in_features=2048, out_features=512, bias=True)\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.2, inplace=False)\n", | |
" (activation): GELU()\n", | |
" )\n", | |
" )\n", | |
" (11): TransformerEncoderLayer(\n", | |
" (self_attn_layer): SelfAttentionSubLayer(\n", | |
" (self_attn): MultiheadAttention(\n", | |
" (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)\n", | |
" )\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.3, inplace=False)\n", | |
" )\n", | |
" (feed_forward_layer): FeedForwardSubLayer(\n", | |
" (linear1): Linear(in_features=512, out_features=2048, bias=True)\n", | |
" (linear2): Linear(in_features=2048, out_features=512, bias=True)\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" (dropout): Dropout(p=0.2, inplace=False)\n", | |
" (activation): GELU()\n", | |
" )\n", | |
" )\n", | |
" )\n", | |
" (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n", | |
" )\n", | |
" (fc): Linear(in_features=512, out_features=241, bias=True)\n", | |
")" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 3 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"def infer(x):\n", | |
" x = x.strip()\n", | |
" x = normalizer(x)\n", | |
" x = tokenizer(x)\n", | |
" x = splitter(x)\n", | |
" x = x.split()\n", | |
" x = [vocab.bos] + [vocab(w) for w in x] + [vocab.eos]\n", | |
" mask_indices = [i for i, w in enumerate(x) if w == vocab.msk]\n", | |
" x = torch.tensor(x)\n", | |
" x = pad([x], padding_value = vocab.pad)\n", | |
" x = Batch(x)\n", | |
" x = x.cuda()\n", | |
" with torch.no_grad():\n", | |
" x = model(x)\n", | |
" x = x.transpose(1, 0)[0]\n", | |
" for i in mask_indices:\n", | |
" prob = torch.softmax(x[i], dim = -1)\n", | |
" values, indices = torch.topk(prob, 10)\n", | |
" tab = [\n", | |
" ['word'] + [vocab[index] for index in indices],\n", | |
" ['prediction (%)'] + ['{:.2f}'.format(value * 100) for value in values]]\n", | |
" tab = tabulate(tab, tablefmt = 'psql')\n", | |
" print('index: {}'.format(i))\n", | |
" print(tab)" | |
], | |
"metadata": { | |
"id": "tiEM8SV5kJei" | |
}, | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"You can guess <msk> by running \"infer('sentence')\"" | |
], | |
"metadata": { | |
"id": "Iv-BqDcmnbVg" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"infer('mi pilin akesi <msk> moli.')" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "-aNzVn4FerF8", | |
"outputId": "4fca95a1-f22a-4b79-f3ef-5da4f5c2de37" | |
}, | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"index: 4\n", | |
"+----------------+-------+------+------+------+------+------+------+------+------+------+\n", | |
"| word | tan | tawa | li | jan | ala | e | mi | sina | lon | pi |\n", | |
"| prediction (%) | 99.84 | 0.09 | 0.03 | 0.01 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |\n", | |
"+----------------+-------+------+------+------+------+------+------+------+------+------+\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"infer('mi sitelen <msk> lipu.')\n", | |
"infer('mi sitelen e soweli <msk> lipu.')" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "SFMbxTmHkfws", | |
"outputId": "41914179-6401-49b2-c70d-7dc265699698" | |
}, | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"index: 3\n", | |
"+----------------+-------+------+------+------+---------+------+------+------+------+--------+\n", | |
"| word | e | lon | insa | tawa | kepeken | tan | li | ala | ma | palisa |\n", | |
"| prediction (%) | 96.43 | 3.38 | 0.09 | 0.04 | 0.03 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |\n", | |
"+----------------+-------+------+------+------+---------+------+------+------+------+--------+\n", | |
"index: 5\n", | |
"+----------------+-------+------+------+---------+------+------+--------+------+------+------+\n", | |
"| word | lon | tomo | e | kepeken | tawa | luka | palisa | sona | toki | musi |\n", | |
"| prediction (%) | 99.32 | 0.28 | 0.14 | 0.09 | 0.03 | 0.03 | 0.02 | 0.02 | 0.01 | 0.01 |\n", | |
"+----------------+-------+------+------+---------+------+------+--------+------+------+------+\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"infer('toki <msk> li toki <msk>.')" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "IA5-jaKQl1jr", | |
"outputId": "7256cd3c-7dc5-4de9-9807-918f7ab8aa44" | |
}, | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"index: 2\n", | |
"+----------------+-------+------+------+------+------+------+------+------+------+------+\n", | |
"| word | pona | mute | ni | mi | ona | lili | nasa | sina | ala | mama |\n", | |
"| prediction (%) | 86.69 | 4.78 | 3.73 | 0.74 | 0.65 | 0.64 | 0.60 | 0.37 | 0.37 | 0.36 |\n", | |
"+----------------+-------+------+------+------+------+------+------+------+------+------+\n", | |
"index: 5\n", | |
"+----------------+-------+-------+------+------+-------+------+------+------+------+------+\n", | |
"| word | pona | ala | lon | mute | utala | taso | nasa | wawa | musi | ante |\n", | |
"| prediction (%) | 59.88 | 18.91 | 7.75 | 3.47 | 1.97 | 1.74 | 1.43 | 1.15 | 0.64 | 0.51 |\n", | |
"+----------------+-------+-------+------+------+-------+------+------+------+------+------+\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"infer('jan Lase li wile <msk>.')\n", | |
"infer('jan Lisa li wile <msk>.')\n", | |
"infer('jan Nanko li wile <msk>.')\n", | |
"infer('jan Kuli li wile <msk>.')\n", | |
"infer('jan Kijolo li wile <msk>.')" | |
], | |
"metadata": { | |
"id": "MV9mCl0_nzdI", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "1f0a8213-c682-416b-d0bf-941434812b6f" | |
}, | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"index: 6\n", | |
"+----------------+-------+-------+---------+------+-------+------+-------+------+------+------+\n", | |
"| word | toki | lape | sitelen | moku | utala | pali | lukin | tawa | kama | esun |\n", | |
"| prediction (%) | 31.40 | 18.28 | 11.57 | 9.99 | 9.00 | 8.04 | 2.45 | 1.74 | 1.39 | 1.16 |\n", | |
"+----------------+-------+-------+---------+------+-------+------+-------+------+------+------+\n", | |
"index: 6\n", | |
"+----------------+-------+-------+-------+-------+------+------+------+-------+------+------+\n", | |
"| word | unpa | lape | moli | lukin | tawa | esun | moku | utala | kama | pona |\n", | |
"| prediction (%) | 36.80 | 13.54 | 11.68 | 7.55 | 6.26 | 5.71 | 5.05 | 2.43 | 2.12 | 1.20 |\n", | |
"+----------------+-------+-------+-------+-------+------+------+------+-------+------+------+\n", | |
"index: 6\n", | |
"+----------------+-------+------+---------+------+-------+------+------+------+------+------+\n", | |
"| word | lape | moku | sitelen | pali | lukin | toki | esun | moli | mute | kama |\n", | |
"| prediction (%) | 75.70 | 6.38 | 2.87 | 2.23 | 2.02 | 1.66 | 1.65 | 1.42 | 0.91 | 0.82 |\n", | |
"+----------------+-------+------+---------+------+-------+------+------+------+------+------+\n", | |
"index: 6\n", | |
"+----------------+-------+-------+------+------+-------+-------+------+------+------+-------+\n", | |
"| word | unpa | lape | moku | toki | utala | akesi | pali | tawa | esun | lukin |\n", | |
"| prediction (%) | 78.09 | 12.42 | 1.63 | 1.37 | 1.04 | 0.73 | 0.72 | 0.70 | 0.56 | 0.51 |\n", | |
"+----------------+-------+-------+------+------+-------+-------+------+------+------+-------+\n", | |
"index: 7\n", | |
"+----------------+-------+-------+------+------+-------+------+------+------+------+------+\n", | |
"| word | lukin | lape | awen | moli | utala | ike | musi | mute | wan | pona |\n", | |
"| prediction (%) | 50.91 | 18.49 | 7.88 | 6.90 | 5.71 | 1.59 | 1.21 | 1.21 | 0.74 | 0.72 |\n", | |
"+----------------+-------+-------+------+------+-------+------+------+------+------+------+\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"" | |
], | |
"metadata": { | |
"id": "9FvVB4jV3AKb" | |
}, | |
"execution_count": 8, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment