-
-
Save 5hv5hvnk/265244cc4b623a74ee85d268b10d661c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [] | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
}, | |
"gpuClass": "standard" | |
}, | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import nltk\n", | |
"from nltk.corpus import cmudict\n", | |
"from tqdm import tqdm\n", | |
"import pymc as pm" | |
], | |
"metadata": { | |
"id": "NfGis646LvhI" | |
}, | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"nltk.download('cmudict')\n", | |
"cmu_dict = cmudict.dict()\n", | |
"phoneme_dict = {}\n", | |
"for word, pronunciations in cmu_dict.items():\n", | |
" phoneme_dict[word] = [phoneme for pronunciation in pronunciations for phoneme in pronunciation]" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "oaO42gm1LwNZ", | |
"outputId": "7aafc0aa-7520-4ee1-e1e3-2e54dfafe55d" | |
}, | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"[nltk_data] Downloading package cmudict to /root/nltk_data...\n", | |
"[nltk_data] Package cmudict is already up-to-date!\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import sys\n", | |
"sys.setrecursionlimit(100000)" | |
], | |
"metadata": { | |
"id": "vuHqoNVuL5pE" | |
}, | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"with pm.Model() as model:\n", | |
" alpha = np.ones(39)\n", | |
" theta = pm.Dirichlet('theta', a=alpha, shape=39)\n", | |
" for phoneme in range(39):\n", | |
" phoneme_prob = pm.math.prod(theta[phoneme])\n", | |
" pm.Binomial('phoneme_prob_{}'.format(phoneme), n=1, p=phoneme_prob, observed=1)\n", | |
"\n", | |
"with model:\n", | |
" trace = pm.sample(draws=2000, tune=1000, chains=2, cores=1)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 56 | |
}, | |
"id": "ncuQpPz5L2Xr", | |
"outputId": "0620e00f-5311-415a-cb80-67ce83280be0" | |
}, | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
], | |
"text/html": [ | |
"\n", | |
"<style>\n", | |
" /* Turns off some styling */\n", | |
" progress {\n", | |
" /* gets rid of default border in Firefox and Opera. */\n", | |
" border: none;\n", | |
" /* Needs to be in here for Safari polyfill so background images work as expected. */\n", | |
" background-size: auto;\n", | |
" }\n", | |
" progress:not([value]), progress:not([value])::-webkit-progress-bar {\n", | |
" background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px);\n", | |
" }\n", | |
" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n", | |
" background: #F44336;\n", | |
" }\n", | |
"</style>\n" | |
] | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
], | |
"text/html": [ | |
"\n", | |
" <div>\n", | |
" <progress value='3000' class='' max='3000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n", | |
" 100.00% [3000/3000 00:47<00:00 Sampling chain 0, 0 divergences]\n", | |
" </div>\n", | |
" " | |
] | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
], | |
"text/html": [ | |
"\n", | |
"<style>\n", | |
" /* Turns off some styling */\n", | |
" progress {\n", | |
" /* gets rid of default border in Firefox and Opera. */\n", | |
" border: none;\n", | |
" /* Needs to be in here for Safari polyfill so background images work as expected. */\n", | |
" background-size: auto;\n", | |
" }\n", | |
" progress:not([value]), progress:not([value])::-webkit-progress-bar {\n", | |
" background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px);\n", | |
" }\n", | |
" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n", | |
" background: #F44336;\n", | |
" }\n", | |
"</style>\n" | |
] | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
], | |
"text/html": [ | |
"\n", | |
" <div>\n", | |
" <progress value='3000' class='' max='3000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n", | |
" 100.00% [3000/3000 00:44<00:00 Sampling chain 1, 3 divergences]\n", | |
" </div>\n", | |
" " | |
] | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"trace.posterior.theta[:, phoneme]" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 475 | |
}, | |
"id": "B6PsGkJAPgD2", | |
"outputId": "3450dd2c-5393-422b-f312-ab4b1e2d2afa" | |
}, | |
"execution_count": 20, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"<xarray.DataArray 'theta' (chain: 2, theta_dim_0: 39)>\n", | |
"array([[0.08468377, 0.01731509, 0.01993062, 0.01088464, 0.01796477,\n", | |
" 0.04072919, 0.02442778, 0.01402462, 0.02204928, 0.0112055 ,\n", | |
" 0.02012314, 0.02579567, 0.10175219, 0.01705021, 0.02800238,\n", | |
" 0.01737879, 0.02852154, 0.02830964, 0.07086997, 0.00810061,\n", | |
" 0.01055116, 0.00850316, 0.01633823, 0.01982299, 0.02331168,\n", | |
" 0.02123152, 0.01332181, 0.01845547, 0.06057018, 0.02789967,\n", | |
" 0.01663426, 0.01731678, 0.0133699 , 0.03644281, 0.0274727 ,\n", | |
" 0.00659696, 0.03239437, 0.00291172, 0.01773524],\n", | |
" [0.03421254, 0.02013085, 0.0114226 , 0.0293474 , 0.00561449,\n", | |
" 0.02773003, 0.02203436, 0.01714355, 0.0135749 , 0.00134873,\n", | |
" 0.01190287, 0.00472081, 0.02888797, 0.02196991, 0.06368308,\n", | |
" 0.01899323, 0.00493306, 0.04940594, 0.02329974, 0.06127776,\n", | |
" 0.06564107, 0.00694048, 0.00687806, 0.06305144, 0.03059064,\n", | |
" 0.02712587, 0.02322724, 0.00428418, 0.01242838, 0.01857104,\n", | |
" 0.02862752, 0.02607098, 0.03388127, 0.00557591, 0.04088062,\n", | |
" 0.03383657, 0.05129543, 0.02336025, 0.02609922]])\n", | |
"Coordinates:\n", | |
" * chain (chain) int64 0 1\n", | |
" draw int64 38\n", | |
" * theta_dim_0 (theta_dim_0) int64 0 1 2 3 4 5 6 7 ... 31 32 33 34 35 36 37 38" | |
], | |
"text/html": [ | |
"<div><svg style=\"position: absolute; width: 0; height: 0; overflow: hidden\">\n", | |
"<defs>\n", | |
"<symbol id=\"icon-database\" viewBox=\"0 0 32 32\">\n", | |
"<path d=\"M16 0c-8.837 0-16 2.239-16 5v4c0 2.761 7.163 5 16 5s16-2.239 16-5v-4c0-2.761-7.163-5-16-5z\"></path>\n", | |
"<path d=\"M16 17c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n", | |
"<path d=\"M16 26c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n", | |
"</symbol>\n", | |
"<symbol id=\"icon-file-text2\" viewBox=\"0 0 32 32\">\n", | |
"<path d=\"M28.681 7.159c-0.694-0.947-1.662-2.053-2.724-3.116s-2.169-2.030-3.116-2.724c-1.612-1.182-2.393-1.319-2.841-1.319h-15.5c-1.378 0-2.5 1.121-2.5 2.5v27c0 1.378 1.122 2.5 2.5 2.5h23c1.378 0 2.5-1.122 2.5-2.5v-19.5c0-0.448-0.137-1.23-1.319-2.841zM24.543 5.457c0.959 0.959 1.712 1.825 2.268 2.543h-4.811v-4.811c0.718 0.556 1.584 1.309 2.543 2.268zM28 29.5c0 0.271-0.229 0.5-0.5 0.5h-23c-0.271 0-0.5-0.229-0.5-0.5v-27c0-0.271 0.229-0.5 0.5-0.5 0 0 15.499-0 15.5 0v7c0 0.552 0.448 1 1 1h7v19.5z\"></path>\n", | |
"<path d=\"M23 26h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n", | |
"<path d=\"M23 22h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n", | |
"<path d=\"M23 18h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n", | |
"</symbol>\n", | |
"</defs>\n", | |
"</svg>\n", | |
"<style>/* CSS stylesheet for displaying xarray objects in jupyterlab.\n", | |
" *\n", | |
" */\n", | |
"\n", | |
":root {\n", | |
" --xr-font-color0: var(--jp-content-font-color0, rgba(0, 0, 0, 1));\n", | |
" --xr-font-color2: var(--jp-content-font-color2, rgba(0, 0, 0, 0.54));\n", | |
" --xr-font-color3: var(--jp-content-font-color3, rgba(0, 0, 0, 0.38));\n", | |
" --xr-border-color: var(--jp-border-color2, #e0e0e0);\n", | |
" --xr-disabled-color: var(--jp-layout-color3, #bdbdbd);\n", | |
" --xr-background-color: var(--jp-layout-color0, white);\n", | |
" --xr-background-color-row-even: var(--jp-layout-color1, white);\n", | |
" --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n", | |
"}\n", | |
"\n", | |
"html[theme=dark],\n", | |
"body[data-theme=dark],\n", | |
"body.vscode-dark {\n", | |
" --xr-font-color0: rgba(255, 255, 255, 1);\n", | |
" --xr-font-color2: rgba(255, 255, 255, 0.54);\n", | |
" --xr-font-color3: rgba(255, 255, 255, 0.38);\n", | |
" --xr-border-color: #1F1F1F;\n", | |
" --xr-disabled-color: #515151;\n", | |
" --xr-background-color: #111111;\n", | |
" --xr-background-color-row-even: #111111;\n", | |
" --xr-background-color-row-odd: #313131;\n", | |
"}\n", | |
"\n", | |
".xr-wrap {\n", | |
" display: block !important;\n", | |
" min-width: 300px;\n", | |
" max-width: 700px;\n", | |
"}\n", | |
"\n", | |
".xr-text-repr-fallback {\n", | |
" /* fallback to plain text repr when CSS is not injected (untrusted notebook) */\n", | |
" display: none;\n", | |
"}\n", | |
"\n", | |
".xr-header {\n", | |
" padding-top: 6px;\n", | |
" padding-bottom: 6px;\n", | |
" margin-bottom: 4px;\n", | |
" border-bottom: solid 1px var(--xr-border-color);\n", | |
"}\n", | |
"\n", | |
".xr-header > div,\n", | |
".xr-header > ul {\n", | |
" display: inline;\n", | |
" margin-top: 0;\n", | |
" margin-bottom: 0;\n", | |
"}\n", | |
"\n", | |
".xr-obj-type,\n", | |
".xr-array-name {\n", | |
" margin-left: 2px;\n", | |
" margin-right: 10px;\n", | |
"}\n", | |
"\n", | |
".xr-obj-type {\n", | |
" color: var(--xr-font-color2);\n", | |
"}\n", | |
"\n", | |
".xr-sections {\n", | |
" padding-left: 0 !important;\n", | |
" display: grid;\n", | |
" grid-template-columns: 150px auto auto 1fr 20px 20px;\n", | |
"}\n", | |
"\n", | |
".xr-section-item {\n", | |
" display: contents;\n", | |
"}\n", | |
"\n", | |
".xr-section-item input {\n", | |
" display: none;\n", | |
"}\n", | |
"\n", | |
".xr-section-item input + label {\n", | |
" color: var(--xr-disabled-color);\n", | |
"}\n", | |
"\n", | |
".xr-section-item input:enabled + label {\n", | |
" cursor: pointer;\n", | |
" color: var(--xr-font-color2);\n", | |
"}\n", | |
"\n", | |
".xr-section-item input:enabled + label:hover {\n", | |
" color: var(--xr-font-color0);\n", | |
"}\n", | |
"\n", | |
".xr-section-summary {\n", | |
" grid-column: 1;\n", | |
" color: var(--xr-font-color2);\n", | |
" font-weight: 500;\n", | |
"}\n", | |
"\n", | |
".xr-section-summary > span {\n", | |
" display: inline-block;\n", | |
" padding-left: 0.5em;\n", | |
"}\n", | |
"\n", | |
".xr-section-summary-in:disabled + label {\n", | |
" color: var(--xr-font-color2);\n", | |
"}\n", | |
"\n", | |
".xr-section-summary-in + label:before {\n", | |
" display: inline-block;\n", | |
" content: '►';\n", | |
" font-size: 11px;\n", | |
" width: 15px;\n", | |
" text-align: center;\n", | |
"}\n", | |
"\n", | |
".xr-section-summary-in:disabled + label:before {\n", | |
" color: var(--xr-disabled-color);\n", | |
"}\n", | |
"\n", | |
".xr-section-summary-in:checked + label:before {\n", | |
" content: '▼';\n", | |
"}\n", | |
"\n", | |
".xr-section-summary-in:checked + label > span {\n", | |
" display: none;\n", | |
"}\n", | |
"\n", | |
".xr-section-summary,\n", | |
".xr-section-inline-details {\n", | |
" padding-top: 4px;\n", | |
" padding-bottom: 4px;\n", | |
"}\n", | |
"\n", | |
".xr-section-inline-details {\n", | |
" grid-column: 2 / -1;\n", | |
"}\n", | |
"\n", | |
".xr-section-details {\n", | |
" display: none;\n", | |
" grid-column: 1 / -1;\n", | |
" margin-bottom: 5px;\n", | |
"}\n", | |
"\n", | |
".xr-section-summary-in:checked ~ .xr-section-details {\n", | |
" display: contents;\n", | |
"}\n", | |
"\n", | |
".xr-array-wrap {\n", | |
" grid-column: 1 / -1;\n", | |
" display: grid;\n", | |
" grid-template-columns: 20px auto;\n", | |
"}\n", | |
"\n", | |
".xr-array-wrap > label {\n", | |
" grid-column: 1;\n", | |
" vertical-align: top;\n", | |
"}\n", | |
"\n", | |
".xr-preview {\n", | |
" color: var(--xr-font-color3);\n", | |
"}\n", | |
"\n", | |
".xr-array-preview,\n", | |
".xr-array-data {\n", | |
" padding: 0 5px !important;\n", | |
" grid-column: 2;\n", | |
"}\n", | |
"\n", | |
".xr-array-data,\n", | |
".xr-array-in:checked ~ .xr-array-preview {\n", | |
" display: none;\n", | |
"}\n", | |
"\n", | |
".xr-array-in:checked ~ .xr-array-data,\n", | |
".xr-array-preview {\n", | |
" display: inline-block;\n", | |
"}\n", | |
"\n", | |
".xr-dim-list {\n", | |
" display: inline-block !important;\n", | |
" list-style: none;\n", | |
" padding: 0 !important;\n", | |
" margin: 0;\n", | |
"}\n", | |
"\n", | |
".xr-dim-list li {\n", | |
" display: inline-block;\n", | |
" padding: 0;\n", | |
" margin: 0;\n", | |
"}\n", | |
"\n", | |
".xr-dim-list:before {\n", | |
" content: '(';\n", | |
"}\n", | |
"\n", | |
".xr-dim-list:after {\n", | |
" content: ')';\n", | |
"}\n", | |
"\n", | |
".xr-dim-list li:not(:last-child):after {\n", | |
" content: ',';\n", | |
" padding-right: 5px;\n", | |
"}\n", | |
"\n", | |
".xr-has-index {\n", | |
" font-weight: bold;\n", | |
"}\n", | |
"\n", | |
".xr-var-list,\n", | |
".xr-var-item {\n", | |
" display: contents;\n", | |
"}\n", | |
"\n", | |
".xr-var-item > div,\n", | |
".xr-var-item label,\n", | |
".xr-var-item > .xr-var-name span {\n", | |
" background-color: var(--xr-background-color-row-even);\n", | |
" margin-bottom: 0;\n", | |
"}\n", | |
"\n", | |
".xr-var-item > .xr-var-name:hover span {\n", | |
" padding-right: 5px;\n", | |
"}\n", | |
"\n", | |
".xr-var-list > li:nth-child(odd) > div,\n", | |
".xr-var-list > li:nth-child(odd) > label,\n", | |
".xr-var-list > li:nth-child(odd) > .xr-var-name span {\n", | |
" background-color: var(--xr-background-color-row-odd);\n", | |
"}\n", | |
"\n", | |
".xr-var-name {\n", | |
" grid-column: 1;\n", | |
"}\n", | |
"\n", | |
".xr-var-dims {\n", | |
" grid-column: 2;\n", | |
"}\n", | |
"\n", | |
".xr-var-dtype {\n", | |
" grid-column: 3;\n", | |
" text-align: right;\n", | |
" color: var(--xr-font-color2);\n", | |
"}\n", | |
"\n", | |
".xr-var-preview {\n", | |
" grid-column: 4;\n", | |
"}\n", | |
"\n", | |
".xr-index-preview {\n", | |
" grid-column: 2 / 5;\n", | |
" color: var(--xr-font-color2);\n", | |
"}\n", | |
"\n", | |
".xr-var-name,\n", | |
".xr-var-dims,\n", | |
".xr-var-dtype,\n", | |
".xr-preview,\n", | |
".xr-attrs dt {\n", | |
" white-space: nowrap;\n", | |
" overflow: hidden;\n", | |
" text-overflow: ellipsis;\n", | |
" padding-right: 10px;\n", | |
"}\n", | |
"\n", | |
".xr-var-name:hover,\n", | |
".xr-var-dims:hover,\n", | |
".xr-var-dtype:hover,\n", | |
".xr-attrs dt:hover {\n", | |
" overflow: visible;\n", | |
" width: auto;\n", | |
" z-index: 1;\n", | |
"}\n", | |
"\n", | |
".xr-var-attrs,\n", | |
".xr-var-data,\n", | |
".xr-index-data {\n", | |
" display: none;\n", | |
" background-color: var(--xr-background-color) !important;\n", | |
" padding-bottom: 5px !important;\n", | |
"}\n", | |
"\n", | |
".xr-var-attrs-in:checked ~ .xr-var-attrs,\n", | |
".xr-var-data-in:checked ~ .xr-var-data,\n", | |
".xr-index-data-in:checked ~ .xr-index-data {\n", | |
" display: block;\n", | |
"}\n", | |
"\n", | |
".xr-var-data > table {\n", | |
" float: right;\n", | |
"}\n", | |
"\n", | |
".xr-var-name span,\n", | |
".xr-var-data,\n", | |
".xr-index-name div,\n", | |
".xr-index-data,\n", | |
".xr-attrs {\n", | |
" padding-left: 25px !important;\n", | |
"}\n", | |
"\n", | |
".xr-attrs,\n", | |
".xr-var-attrs,\n", | |
".xr-var-data,\n", | |
".xr-index-data {\n", | |
" grid-column: 1 / -1;\n", | |
"}\n", | |
"\n", | |
"dl.xr-attrs {\n", | |
" padding: 0;\n", | |
" margin: 0;\n", | |
" display: grid;\n", | |
" grid-template-columns: 125px auto;\n", | |
"}\n", | |
"\n", | |
".xr-attrs dt,\n", | |
".xr-attrs dd {\n", | |
" padding: 0;\n", | |
" margin: 0;\n", | |
" float: left;\n", | |
" padding-right: 10px;\n", | |
" width: auto;\n", | |
"}\n", | |
"\n", | |
".xr-attrs dt {\n", | |
" font-weight: normal;\n", | |
" grid-column: 1;\n", | |
"}\n", | |
"\n", | |
".xr-attrs dt:hover span {\n", | |
" display: inline-block;\n", | |
" background: var(--xr-background-color);\n", | |
" padding-right: 10px;\n", | |
"}\n", | |
"\n", | |
".xr-attrs dd {\n", | |
" grid-column: 2;\n", | |
" white-space: pre-wrap;\n", | |
" word-break: break-all;\n", | |
"}\n", | |
"\n", | |
".xr-icon-database,\n", | |
".xr-icon-file-text2,\n", | |
".xr-no-icon {\n", | |
" display: inline-block;\n", | |
" vertical-align: middle;\n", | |
" width: 1em;\n", | |
" height: 1.5em !important;\n", | |
" stroke-width: 0;\n", | |
" stroke: currentColor;\n", | |
" fill: currentColor;\n", | |
"}\n", | |
"</style><pre class='xr-text-repr-fallback'><xarray.DataArray 'theta' (chain: 2, theta_dim_0: 39)>\n", | |
"array([[0.08468377, 0.01731509, 0.01993062, 0.01088464, 0.01796477,\n", | |
" 0.04072919, 0.02442778, 0.01402462, 0.02204928, 0.0112055 ,\n", | |
" 0.02012314, 0.02579567, 0.10175219, 0.01705021, 0.02800238,\n", | |
" 0.01737879, 0.02852154, 0.02830964, 0.07086997, 0.00810061,\n", | |
" 0.01055116, 0.00850316, 0.01633823, 0.01982299, 0.02331168,\n", | |
" 0.02123152, 0.01332181, 0.01845547, 0.06057018, 0.02789967,\n", | |
" 0.01663426, 0.01731678, 0.0133699 , 0.03644281, 0.0274727 ,\n", | |
" 0.00659696, 0.03239437, 0.00291172, 0.01773524],\n", | |
" [0.03421254, 0.02013085, 0.0114226 , 0.0293474 , 0.00561449,\n", | |
" 0.02773003, 0.02203436, 0.01714355, 0.0135749 , 0.00134873,\n", | |
" 0.01190287, 0.00472081, 0.02888797, 0.02196991, 0.06368308,\n", | |
" 0.01899323, 0.00493306, 0.04940594, 0.02329974, 0.06127776,\n", | |
" 0.06564107, 0.00694048, 0.00687806, 0.06305144, 0.03059064,\n", | |
" 0.02712587, 0.02322724, 0.00428418, 0.01242838, 0.01857104,\n", | |
" 0.02862752, 0.02607098, 0.03388127, 0.00557591, 0.04088062,\n", | |
" 0.03383657, 0.05129543, 0.02336025, 0.02609922]])\n", | |
"Coordinates:\n", | |
" * chain (chain) int64 0 1\n", | |
" draw int64 38\n", | |
" * theta_dim_0 (theta_dim_0) int64 0 1 2 3 4 5 6 7 ... 31 32 33 34 35 36 37 38</pre><div class='xr-wrap' style='display:none'><div class='xr-header'><div class='xr-obj-type'>xarray.DataArray</div><div class='xr-array-name'>'theta'</div><ul class='xr-dim-list'><li><span class='xr-has-index'>chain</span>: 2</li><li><span class='xr-has-index'>theta_dim_0</span>: 39</li></ul></div><ul class='xr-sections'><li class='xr-section-item'><div class='xr-array-wrap'><input id='section-a76fa0a6-6588-4850-9685-7cb2b612016f' class='xr-array-in' type='checkbox' checked><label for='section-a76fa0a6-6588-4850-9685-7cb2b612016f' title='Show/hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-array-preview xr-preview'><span>0.08468 0.01732 0.01993 0.01088 ... 0.03384 0.0513 0.02336 0.0261</span></div><div class='xr-array-data'><pre>array([[0.08468377, 0.01731509, 0.01993062, 0.01088464, 0.01796477,\n", | |
" 0.04072919, 0.02442778, 0.01402462, 0.02204928, 0.0112055 ,\n", | |
" 0.02012314, 0.02579567, 0.10175219, 0.01705021, 0.02800238,\n", | |
" 0.01737879, 0.02852154, 0.02830964, 0.07086997, 0.00810061,\n", | |
" 0.01055116, 0.00850316, 0.01633823, 0.01982299, 0.02331168,\n", | |
" 0.02123152, 0.01332181, 0.01845547, 0.06057018, 0.02789967,\n", | |
" 0.01663426, 0.01731678, 0.0133699 , 0.03644281, 0.0274727 ,\n", | |
" 0.00659696, 0.03239437, 0.00291172, 0.01773524],\n", | |
" [0.03421254, 0.02013085, 0.0114226 , 0.0293474 , 0.00561449,\n", | |
" 0.02773003, 0.02203436, 0.01714355, 0.0135749 , 0.00134873,\n", | |
" 0.01190287, 0.00472081, 0.02888797, 0.02196991, 0.06368308,\n", | |
" 0.01899323, 0.00493306, 0.04940594, 0.02329974, 0.06127776,\n", | |
" 0.06564107, 0.00694048, 0.00687806, 0.06305144, 0.03059064,\n", | |
" 0.02712587, 0.02322724, 0.00428418, 0.01242838, 0.01857104,\n", | |
" 0.02862752, 0.02607098, 0.03388127, 0.00557591, 0.04088062,\n", | |
" 0.03383657, 0.05129543, 0.02336025, 0.02609922]])</pre></div></div></li><li class='xr-section-item'><input id='section-1c2d17cc-0193-46a9-8a8e-dc07c454bac2' class='xr-section-summary-in' type='checkbox' checked><label for='section-1c2d17cc-0193-46a9-8a8e-dc07c454bac2' class='xr-section-summary' >Coordinates: <span>(3)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>chain</span></div><div class='xr-var-dims'>(chain)</div><div class='xr-var-dtype'>int64</div><div class='xr-var-preview xr-preview'>0 1</div><input id='attrs-291aa086-cc2f-400c-9713-88d9905dbb82' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-291aa086-cc2f-400c-9713-88d9905dbb82' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-897cafb4-cca5-482f-a11d-7d56f0518278' class='xr-var-data-in' type='checkbox'><label for='data-897cafb4-cca5-482f-a11d-7d56f0518278' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array([0, 1])</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>draw</span></div><div class='xr-var-dims'>()</div><div class='xr-var-dtype'>int64</div><div class='xr-var-preview xr-preview'>38</div><input id='attrs-8acdfef1-9948-4cfe-b3e9-20333bbe6740' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-8acdfef1-9948-4cfe-b3e9-20333bbe6740' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-7128c9fc-d03c-4b71-9bc8-733cffbef142' class='xr-var-data-in' type='checkbox'><label for='data-7128c9fc-d03c-4b71-9bc8-733cffbef142' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array(38)</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>theta_dim_0</span></div><div class='xr-var-dims'>(theta_dim_0)</div><div class='xr-var-dtype'>int64</div><div class='xr-var-preview xr-preview'>0 1 2 3 4 5 6 ... 33 34 35 36 37 38</div><input id='attrs-5856a7e6-0f5e-41bb-9f89-bf74ede30176' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-5856a7e6-0f5e-41bb-9f89-bf74ede30176' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-60c9c8f2-48fa-4820-a524-58d5b3d91d1f' class='xr-var-data-in' type='checkbox'><label for='data-60c9c8f2-48fa-4820-a524-58d5b3d91d1f' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\n", | |
" 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,\n", | |
" 36, 37, 38])</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-0d210ab2-fa65-439f-8ac6-8a79e3aed240' class='xr-section-summary-in' type='checkbox' ><label for='section-0d210ab2-fa65-439f-8ac6-8a79e3aed240' class='xr-section-summary' >Indexes: <span>(2)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-index-name'><div>chain</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-bfefa97e-3872-42df-8e10-f22e8b563e67' class='xr-index-data-in' type='checkbox'/><label for='index-bfefa97e-3872-42df-8e10-f22e8b563e67' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(Int64Index([0, 1], dtype='int64', name='chain'))</pre></div></li><li class='xr-var-item'><div class='xr-index-name'><div>theta_dim_0</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-5a7b23f4-319c-4cf0-8a53-4e0af8d2965a' class='xr-index-data-in' type='checkbox'/><label for='index-5a7b23f4-319c-4cf0-8a53-4e0af8d2965a' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", | |
" 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n", | |
" 34, 35, 36, 37, 38],\n", | |
" dtype='int64', name='theta_dim_0'))</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-52c9221b-7e6c-42ad-86a8-d6be67dc14ec' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-52c9221b-7e6c-42ad-86a8-d6be67dc14ec' class='xr-section-summary' title='Expand/collapse section'>Attributes: <span>(0)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><dl class='xr-attrs'></dl></div></li></ul></div></div>" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 20 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"phoneme_set = set(phoneme for phoneme_seq in phoneme_dict.values() for phoneme in phoneme_seq)\n", | |
"phoneme_indices = {phoneme: i for i, phoneme in enumerate(phoneme_set)}\n", | |
"\n", | |
"# Convert phonemes to integer indices\n", | |
"phoneme_dict_int = {\n", | |
" word: [phoneme_indices[phoneme] for phoneme in phonemes]\n", | |
" for word, phonemes in phoneme_dict.items()\n", | |
"}\n", | |
"for word, phonemes in phoneme_dict_int.items():\n", | |
" phoneme_probs[word] = np.mean([np.prod(trace.posterior.theta[:, phoneme]) for phoneme in phonemes])\n" | |
], | |
"metadata": { | |
"id": "vxhpGspiQwkA" | |
}, | |
"execution_count": 27, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"def grade_paragraph(paragraph):\n", | |
" words = paragraph.split()\n", | |
" word_probs = [phoneme_probs[word] for word in words if word in phoneme_probs]\n", | |
" print(word_probs)\n", | |
" if len(word_probs) > 0:\n", | |
" complexity_score = np.mean(word_probs)\n", | |
" else:\n", | |
" complexity_score = 0\n", | |
" return complexity_score\n", | |
"\n", | |
"with open('word_complexity.csv', 'w') as f:\n", | |
" for word, complexity_score in phoneme_probs.items():\n", | |
" f.write('{},{}\\n'.format(word, complexity_score))" | |
], | |
"metadata": { | |
"id": "pU10m7x2NBiJ" | |
}, | |
"execution_count": 37, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"paragraph = \"There is a well in the village. There is also a pond. Cows, bulls, buffalos, horses, donkeys, dogs and goats drink water from the pond. They sometimes have a bath in it. You can also see many cats in the lanes. They like to drink milk and chase mice.\"" | |
], | |
"metadata": { | |
"id": "OYOYTsOPUnli" | |
}, | |
"execution_count": 38, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"grade_paragraph(paragraph)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "qGCSHr59NEsm", | |
"outputId": "52595904-d16c-48f8-8588-d5d431079a56" | |
}, | |
"execution_count": 39, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"[9.2845983573296e-133, 2.7186232337646675e-132, 1.0032740236021434e-135, 8.339816066898933e-133, 9.499390730469265e-133, 9.2845983573296e-133, 1.6760150427710444e-134, 2.7186232337646675e-132, 7.821208648712397e-134, 1.1861918875973315e-132, 6.88583332191121e-133, 1.2410362436430073e-133, 7.796415550600236e-133, 1.0912998969335796e-133, 9.499390730469265e-133, 8.06716725212648e-133, 7.618929031308053e-133, 2.7186232337646675e-132, 8.792881812051788e-133, 8.339816066898933e-133, 1.3326000092015514e-132, 1.6760150427710444e-134, 6.112587703035012e-134, 4.917774930995188e-134, 1.2509511887670658e-132, 8.339816066898933e-133, 9.499390730469265e-133, 1.6561120020092671e-133, 2.7796759547796124e-132, 1.2410362436430073e-133, 1.4304785147703589e-133, 1.1861918875973315e-132, 8.3227202828127e-134]\n" | |
] | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"8.481378126468953e-133" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 39 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"grade_paragraph(\"buffalos\")" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "LFnoNShSUxBM", | |
"outputId": "64baa1c8-fb4c-4b56-fc0a-1560f9ceb788" | |
}, | |
"execution_count": 36, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"9.212103441134748e-133" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 36 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"grade_paragraph(\"village\")" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "REIIAVX9U8vi", | |
"outputId": "0ce794b3-1ec9-4774-f332-89f4208629ea" | |
}, | |
"execution_count": 34, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"1.1145970216057637e-132" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 34 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [], | |
"metadata": { | |
"id": "RVkdQIJzVDfe" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment