Skip to content

Instantly share code, notes, and snippets.

@5hv5hvnk
Created March 16, 2023 21:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 5hv5hvnk/265244cc4b623a74ee85d268b10d661c to your computer and use it in GitHub Desktop.
Save 5hv5hvnk/265244cc4b623a74ee85d268b10d661c to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"gpuClass": "standard"
},
"cells": [
{
"cell_type": "code",
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import nltk\n",
"from nltk.corpus import cmudict\n",
"from tqdm import tqdm\n",
"import pymc as pm"
],
"metadata": {
"id": "NfGis646LvhI"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"source": [
"nltk.download('cmudict')\n",
"cmu_dict = cmudict.dict()\n",
"phoneme_dict = {}\n",
"for word, pronunciations in cmu_dict.items():\n",
" phoneme_dict[word] = [phoneme for pronunciation in pronunciations for phoneme in pronunciation]"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "oaO42gm1LwNZ",
"outputId": "7aafc0aa-7520-4ee1-e1e3-2e54dfafe55d"
},
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"[nltk_data] Downloading package cmudict to /root/nltk_data...\n",
"[nltk_data] Package cmudict is already up-to-date!\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import sys\n",
"sys.setrecursionlimit(100000)"
],
"metadata": {
"id": "vuHqoNVuL5pE"
},
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"source": [
"with pm.Model() as model:\n",
" alpha = np.ones(39)\n",
" theta = pm.Dirichlet('theta', a=alpha, shape=39)\n",
" for phoneme in range(39):\n",
" phoneme_prob = pm.math.prod(theta[phoneme])\n",
" pm.Binomial('phoneme_prob_{}'.format(phoneme), n=1, p=phoneme_prob, observed=1)\n",
"\n",
"with model:\n",
" trace = pm.sample(draws=2000, tune=1000, chains=2, cores=1)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 56
},
"id": "ncuQpPz5L2Xr",
"outputId": "0620e00f-5311-415a-cb80-67ce83280be0"
},
"execution_count": 5,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
"<style>\n",
" /* Turns off some styling */\n",
" progress {\n",
" /* gets rid of default border in Firefox and Opera. */\n",
" border: none;\n",
" /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
" background-size: auto;\n",
" }\n",
" progress:not([value]), progress:not([value])::-webkit-progress-bar {\n",
" background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px);\n",
" }\n",
" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
" background: #F44336;\n",
" }\n",
"</style>\n"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <div>\n",
" <progress value='3000' class='' max='3000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" 100.00% [3000/3000 00:47&lt;00:00 Sampling chain 0, 0 divergences]\n",
" </div>\n",
" "
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
"<style>\n",
" /* Turns off some styling */\n",
" progress {\n",
" /* gets rid of default border in Firefox and Opera. */\n",
" border: none;\n",
" /* Needs to be in here for Safari polyfill so background images work as expected. */\n",
" background-size: auto;\n",
" }\n",
" progress:not([value]), progress:not([value])::-webkit-progress-bar {\n",
" background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px);\n",
" }\n",
" .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n",
" background: #F44336;\n",
" }\n",
"</style>\n"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.HTML object>"
],
"text/html": [
"\n",
" <div>\n",
" <progress value='3000' class='' max='3000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" 100.00% [3000/3000 00:44&lt;00:00 Sampling chain 1, 3 divergences]\n",
" </div>\n",
" "
]
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"trace.posterior.theta[:, phoneme]"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 475
},
"id": "B6PsGkJAPgD2",
"outputId": "3450dd2c-5393-422b-f312-ab4b1e2d2afa"
},
"execution_count": 20,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<xarray.DataArray 'theta' (chain: 2, theta_dim_0: 39)>\n",
"array([[0.08468377, 0.01731509, 0.01993062, 0.01088464, 0.01796477,\n",
" 0.04072919, 0.02442778, 0.01402462, 0.02204928, 0.0112055 ,\n",
" 0.02012314, 0.02579567, 0.10175219, 0.01705021, 0.02800238,\n",
" 0.01737879, 0.02852154, 0.02830964, 0.07086997, 0.00810061,\n",
" 0.01055116, 0.00850316, 0.01633823, 0.01982299, 0.02331168,\n",
" 0.02123152, 0.01332181, 0.01845547, 0.06057018, 0.02789967,\n",
" 0.01663426, 0.01731678, 0.0133699 , 0.03644281, 0.0274727 ,\n",
" 0.00659696, 0.03239437, 0.00291172, 0.01773524],\n",
" [0.03421254, 0.02013085, 0.0114226 , 0.0293474 , 0.00561449,\n",
" 0.02773003, 0.02203436, 0.01714355, 0.0135749 , 0.00134873,\n",
" 0.01190287, 0.00472081, 0.02888797, 0.02196991, 0.06368308,\n",
" 0.01899323, 0.00493306, 0.04940594, 0.02329974, 0.06127776,\n",
" 0.06564107, 0.00694048, 0.00687806, 0.06305144, 0.03059064,\n",
" 0.02712587, 0.02322724, 0.00428418, 0.01242838, 0.01857104,\n",
" 0.02862752, 0.02607098, 0.03388127, 0.00557591, 0.04088062,\n",
" 0.03383657, 0.05129543, 0.02336025, 0.02609922]])\n",
"Coordinates:\n",
" * chain (chain) int64 0 1\n",
" draw int64 38\n",
" * theta_dim_0 (theta_dim_0) int64 0 1 2 3 4 5 6 7 ... 31 32 33 34 35 36 37 38"
],
"text/html": [
"<div><svg style=\"position: absolute; width: 0; height: 0; overflow: hidden\">\n",
"<defs>\n",
"<symbol id=\"icon-database\" viewBox=\"0 0 32 32\">\n",
"<path d=\"M16 0c-8.837 0-16 2.239-16 5v4c0 2.761 7.163 5 16 5s16-2.239 16-5v-4c0-2.761-7.163-5-16-5z\"></path>\n",
"<path d=\"M16 17c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n",
"<path d=\"M16 26c-8.837 0-16-2.239-16-5v6c0 2.761 7.163 5 16 5s16-2.239 16-5v-6c0 2.761-7.163 5-16 5z\"></path>\n",
"</symbol>\n",
"<symbol id=\"icon-file-text2\" viewBox=\"0 0 32 32\">\n",
"<path d=\"M28.681 7.159c-0.694-0.947-1.662-2.053-2.724-3.116s-2.169-2.030-3.116-2.724c-1.612-1.182-2.393-1.319-2.841-1.319h-15.5c-1.378 0-2.5 1.121-2.5 2.5v27c0 1.378 1.122 2.5 2.5 2.5h23c1.378 0 2.5-1.122 2.5-2.5v-19.5c0-0.448-0.137-1.23-1.319-2.841zM24.543 5.457c0.959 0.959 1.712 1.825 2.268 2.543h-4.811v-4.811c0.718 0.556 1.584 1.309 2.543 2.268zM28 29.5c0 0.271-0.229 0.5-0.5 0.5h-23c-0.271 0-0.5-0.229-0.5-0.5v-27c0-0.271 0.229-0.5 0.5-0.5 0 0 15.499-0 15.5 0v7c0 0.552 0.448 1 1 1h7v19.5z\"></path>\n",
"<path d=\"M23 26h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"<path d=\"M23 22h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"<path d=\"M23 18h-14c-0.552 0-1-0.448-1-1s0.448-1 1-1h14c0.552 0 1 0.448 1 1s-0.448 1-1 1z\"></path>\n",
"</symbol>\n",
"</defs>\n",
"</svg>\n",
"<style>/* CSS stylesheet for displaying xarray objects in jupyterlab.\n",
" *\n",
" */\n",
"\n",
":root {\n",
" --xr-font-color0: var(--jp-content-font-color0, rgba(0, 0, 0, 1));\n",
" --xr-font-color2: var(--jp-content-font-color2, rgba(0, 0, 0, 0.54));\n",
" --xr-font-color3: var(--jp-content-font-color3, rgba(0, 0, 0, 0.38));\n",
" --xr-border-color: var(--jp-border-color2, #e0e0e0);\n",
" --xr-disabled-color: var(--jp-layout-color3, #bdbdbd);\n",
" --xr-background-color: var(--jp-layout-color0, white);\n",
" --xr-background-color-row-even: var(--jp-layout-color1, white);\n",
" --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee);\n",
"}\n",
"\n",
"html[theme=dark],\n",
"body[data-theme=dark],\n",
"body.vscode-dark {\n",
" --xr-font-color0: rgba(255, 255, 255, 1);\n",
" --xr-font-color2: rgba(255, 255, 255, 0.54);\n",
" --xr-font-color3: rgba(255, 255, 255, 0.38);\n",
" --xr-border-color: #1F1F1F;\n",
" --xr-disabled-color: #515151;\n",
" --xr-background-color: #111111;\n",
" --xr-background-color-row-even: #111111;\n",
" --xr-background-color-row-odd: #313131;\n",
"}\n",
"\n",
".xr-wrap {\n",
" display: block !important;\n",
" min-width: 300px;\n",
" max-width: 700px;\n",
"}\n",
"\n",
".xr-text-repr-fallback {\n",
" /* fallback to plain text repr when CSS is not injected (untrusted notebook) */\n",
" display: none;\n",
"}\n",
"\n",
".xr-header {\n",
" padding-top: 6px;\n",
" padding-bottom: 6px;\n",
" margin-bottom: 4px;\n",
" border-bottom: solid 1px var(--xr-border-color);\n",
"}\n",
"\n",
".xr-header > div,\n",
".xr-header > ul {\n",
" display: inline;\n",
" margin-top: 0;\n",
" margin-bottom: 0;\n",
"}\n",
"\n",
".xr-obj-type,\n",
".xr-array-name {\n",
" margin-left: 2px;\n",
" margin-right: 10px;\n",
"}\n",
"\n",
".xr-obj-type {\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-sections {\n",
" padding-left: 0 !important;\n",
" display: grid;\n",
" grid-template-columns: 150px auto auto 1fr 20px 20px;\n",
"}\n",
"\n",
".xr-section-item {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-section-item input {\n",
" display: none;\n",
"}\n",
"\n",
".xr-section-item input + label {\n",
" color: var(--xr-disabled-color);\n",
"}\n",
"\n",
".xr-section-item input:enabled + label {\n",
" cursor: pointer;\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-section-item input:enabled + label:hover {\n",
" color: var(--xr-font-color0);\n",
"}\n",
"\n",
".xr-section-summary {\n",
" grid-column: 1;\n",
" color: var(--xr-font-color2);\n",
" font-weight: 500;\n",
"}\n",
"\n",
".xr-section-summary > span {\n",
" display: inline-block;\n",
" padding-left: 0.5em;\n",
"}\n",
"\n",
".xr-section-summary-in:disabled + label {\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-section-summary-in + label:before {\n",
" display: inline-block;\n",
" content: '►';\n",
" font-size: 11px;\n",
" width: 15px;\n",
" text-align: center;\n",
"}\n",
"\n",
".xr-section-summary-in:disabled + label:before {\n",
" color: var(--xr-disabled-color);\n",
"}\n",
"\n",
".xr-section-summary-in:checked + label:before {\n",
" content: '▼';\n",
"}\n",
"\n",
".xr-section-summary-in:checked + label > span {\n",
" display: none;\n",
"}\n",
"\n",
".xr-section-summary,\n",
".xr-section-inline-details {\n",
" padding-top: 4px;\n",
" padding-bottom: 4px;\n",
"}\n",
"\n",
".xr-section-inline-details {\n",
" grid-column: 2 / -1;\n",
"}\n",
"\n",
".xr-section-details {\n",
" display: none;\n",
" grid-column: 1 / -1;\n",
" margin-bottom: 5px;\n",
"}\n",
"\n",
".xr-section-summary-in:checked ~ .xr-section-details {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-array-wrap {\n",
" grid-column: 1 / -1;\n",
" display: grid;\n",
" grid-template-columns: 20px auto;\n",
"}\n",
"\n",
".xr-array-wrap > label {\n",
" grid-column: 1;\n",
" vertical-align: top;\n",
"}\n",
"\n",
".xr-preview {\n",
" color: var(--xr-font-color3);\n",
"}\n",
"\n",
".xr-array-preview,\n",
".xr-array-data {\n",
" padding: 0 5px !important;\n",
" grid-column: 2;\n",
"}\n",
"\n",
".xr-array-data,\n",
".xr-array-in:checked ~ .xr-array-preview {\n",
" display: none;\n",
"}\n",
"\n",
".xr-array-in:checked ~ .xr-array-data,\n",
".xr-array-preview {\n",
" display: inline-block;\n",
"}\n",
"\n",
".xr-dim-list {\n",
" display: inline-block !important;\n",
" list-style: none;\n",
" padding: 0 !important;\n",
" margin: 0;\n",
"}\n",
"\n",
".xr-dim-list li {\n",
" display: inline-block;\n",
" padding: 0;\n",
" margin: 0;\n",
"}\n",
"\n",
".xr-dim-list:before {\n",
" content: '(';\n",
"}\n",
"\n",
".xr-dim-list:after {\n",
" content: ')';\n",
"}\n",
"\n",
".xr-dim-list li:not(:last-child):after {\n",
" content: ',';\n",
" padding-right: 5px;\n",
"}\n",
"\n",
".xr-has-index {\n",
" font-weight: bold;\n",
"}\n",
"\n",
".xr-var-list,\n",
".xr-var-item {\n",
" display: contents;\n",
"}\n",
"\n",
".xr-var-item > div,\n",
".xr-var-item label,\n",
".xr-var-item > .xr-var-name span {\n",
" background-color: var(--xr-background-color-row-even);\n",
" margin-bottom: 0;\n",
"}\n",
"\n",
".xr-var-item > .xr-var-name:hover span {\n",
" padding-right: 5px;\n",
"}\n",
"\n",
".xr-var-list > li:nth-child(odd) > div,\n",
".xr-var-list > li:nth-child(odd) > label,\n",
".xr-var-list > li:nth-child(odd) > .xr-var-name span {\n",
" background-color: var(--xr-background-color-row-odd);\n",
"}\n",
"\n",
".xr-var-name {\n",
" grid-column: 1;\n",
"}\n",
"\n",
".xr-var-dims {\n",
" grid-column: 2;\n",
"}\n",
"\n",
".xr-var-dtype {\n",
" grid-column: 3;\n",
" text-align: right;\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-var-preview {\n",
" grid-column: 4;\n",
"}\n",
"\n",
".xr-index-preview {\n",
" grid-column: 2 / 5;\n",
" color: var(--xr-font-color2);\n",
"}\n",
"\n",
".xr-var-name,\n",
".xr-var-dims,\n",
".xr-var-dtype,\n",
".xr-preview,\n",
".xr-attrs dt {\n",
" white-space: nowrap;\n",
" overflow: hidden;\n",
" text-overflow: ellipsis;\n",
" padding-right: 10px;\n",
"}\n",
"\n",
".xr-var-name:hover,\n",
".xr-var-dims:hover,\n",
".xr-var-dtype:hover,\n",
".xr-attrs dt:hover {\n",
" overflow: visible;\n",
" width: auto;\n",
" z-index: 1;\n",
"}\n",
"\n",
".xr-var-attrs,\n",
".xr-var-data,\n",
".xr-index-data {\n",
" display: none;\n",
" background-color: var(--xr-background-color) !important;\n",
" padding-bottom: 5px !important;\n",
"}\n",
"\n",
".xr-var-attrs-in:checked ~ .xr-var-attrs,\n",
".xr-var-data-in:checked ~ .xr-var-data,\n",
".xr-index-data-in:checked ~ .xr-index-data {\n",
" display: block;\n",
"}\n",
"\n",
".xr-var-data > table {\n",
" float: right;\n",
"}\n",
"\n",
".xr-var-name span,\n",
".xr-var-data,\n",
".xr-index-name div,\n",
".xr-index-data,\n",
".xr-attrs {\n",
" padding-left: 25px !important;\n",
"}\n",
"\n",
".xr-attrs,\n",
".xr-var-attrs,\n",
".xr-var-data,\n",
".xr-index-data {\n",
" grid-column: 1 / -1;\n",
"}\n",
"\n",
"dl.xr-attrs {\n",
" padding: 0;\n",
" margin: 0;\n",
" display: grid;\n",
" grid-template-columns: 125px auto;\n",
"}\n",
"\n",
".xr-attrs dt,\n",
".xr-attrs dd {\n",
" padding: 0;\n",
" margin: 0;\n",
" float: left;\n",
" padding-right: 10px;\n",
" width: auto;\n",
"}\n",
"\n",
".xr-attrs dt {\n",
" font-weight: normal;\n",
" grid-column: 1;\n",
"}\n",
"\n",
".xr-attrs dt:hover span {\n",
" display: inline-block;\n",
" background: var(--xr-background-color);\n",
" padding-right: 10px;\n",
"}\n",
"\n",
".xr-attrs dd {\n",
" grid-column: 2;\n",
" white-space: pre-wrap;\n",
" word-break: break-all;\n",
"}\n",
"\n",
".xr-icon-database,\n",
".xr-icon-file-text2,\n",
".xr-no-icon {\n",
" display: inline-block;\n",
" vertical-align: middle;\n",
" width: 1em;\n",
" height: 1.5em !important;\n",
" stroke-width: 0;\n",
" stroke: currentColor;\n",
" fill: currentColor;\n",
"}\n",
"</style><pre class='xr-text-repr-fallback'>&lt;xarray.DataArray &#x27;theta&#x27; (chain: 2, theta_dim_0: 39)&gt;\n",
"array([[0.08468377, 0.01731509, 0.01993062, 0.01088464, 0.01796477,\n",
" 0.04072919, 0.02442778, 0.01402462, 0.02204928, 0.0112055 ,\n",
" 0.02012314, 0.02579567, 0.10175219, 0.01705021, 0.02800238,\n",
" 0.01737879, 0.02852154, 0.02830964, 0.07086997, 0.00810061,\n",
" 0.01055116, 0.00850316, 0.01633823, 0.01982299, 0.02331168,\n",
" 0.02123152, 0.01332181, 0.01845547, 0.06057018, 0.02789967,\n",
" 0.01663426, 0.01731678, 0.0133699 , 0.03644281, 0.0274727 ,\n",
" 0.00659696, 0.03239437, 0.00291172, 0.01773524],\n",
" [0.03421254, 0.02013085, 0.0114226 , 0.0293474 , 0.00561449,\n",
" 0.02773003, 0.02203436, 0.01714355, 0.0135749 , 0.00134873,\n",
" 0.01190287, 0.00472081, 0.02888797, 0.02196991, 0.06368308,\n",
" 0.01899323, 0.00493306, 0.04940594, 0.02329974, 0.06127776,\n",
" 0.06564107, 0.00694048, 0.00687806, 0.06305144, 0.03059064,\n",
" 0.02712587, 0.02322724, 0.00428418, 0.01242838, 0.01857104,\n",
" 0.02862752, 0.02607098, 0.03388127, 0.00557591, 0.04088062,\n",
" 0.03383657, 0.05129543, 0.02336025, 0.02609922]])\n",
"Coordinates:\n",
" * chain (chain) int64 0 1\n",
" draw int64 38\n",
" * theta_dim_0 (theta_dim_0) int64 0 1 2 3 4 5 6 7 ... 31 32 33 34 35 36 37 38</pre><div class='xr-wrap' style='display:none'><div class='xr-header'><div class='xr-obj-type'>xarray.DataArray</div><div class='xr-array-name'>'theta'</div><ul class='xr-dim-list'><li><span class='xr-has-index'>chain</span>: 2</li><li><span class='xr-has-index'>theta_dim_0</span>: 39</li></ul></div><ul class='xr-sections'><li class='xr-section-item'><div class='xr-array-wrap'><input id='section-a76fa0a6-6588-4850-9685-7cb2b612016f' class='xr-array-in' type='checkbox' checked><label for='section-a76fa0a6-6588-4850-9685-7cb2b612016f' title='Show/hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-array-preview xr-preview'><span>0.08468 0.01732 0.01993 0.01088 ... 0.03384 0.0513 0.02336 0.0261</span></div><div class='xr-array-data'><pre>array([[0.08468377, 0.01731509, 0.01993062, 0.01088464, 0.01796477,\n",
" 0.04072919, 0.02442778, 0.01402462, 0.02204928, 0.0112055 ,\n",
" 0.02012314, 0.02579567, 0.10175219, 0.01705021, 0.02800238,\n",
" 0.01737879, 0.02852154, 0.02830964, 0.07086997, 0.00810061,\n",
" 0.01055116, 0.00850316, 0.01633823, 0.01982299, 0.02331168,\n",
" 0.02123152, 0.01332181, 0.01845547, 0.06057018, 0.02789967,\n",
" 0.01663426, 0.01731678, 0.0133699 , 0.03644281, 0.0274727 ,\n",
" 0.00659696, 0.03239437, 0.00291172, 0.01773524],\n",
" [0.03421254, 0.02013085, 0.0114226 , 0.0293474 , 0.00561449,\n",
" 0.02773003, 0.02203436, 0.01714355, 0.0135749 , 0.00134873,\n",
" 0.01190287, 0.00472081, 0.02888797, 0.02196991, 0.06368308,\n",
" 0.01899323, 0.00493306, 0.04940594, 0.02329974, 0.06127776,\n",
" 0.06564107, 0.00694048, 0.00687806, 0.06305144, 0.03059064,\n",
" 0.02712587, 0.02322724, 0.00428418, 0.01242838, 0.01857104,\n",
" 0.02862752, 0.02607098, 0.03388127, 0.00557591, 0.04088062,\n",
" 0.03383657, 0.05129543, 0.02336025, 0.02609922]])</pre></div></div></li><li class='xr-section-item'><input id='section-1c2d17cc-0193-46a9-8a8e-dc07c454bac2' class='xr-section-summary-in' type='checkbox' checked><label for='section-1c2d17cc-0193-46a9-8a8e-dc07c454bac2' class='xr-section-summary' >Coordinates: <span>(3)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>chain</span></div><div class='xr-var-dims'>(chain)</div><div class='xr-var-dtype'>int64</div><div class='xr-var-preview xr-preview'>0 1</div><input id='attrs-291aa086-cc2f-400c-9713-88d9905dbb82' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-291aa086-cc2f-400c-9713-88d9905dbb82' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-897cafb4-cca5-482f-a11d-7d56f0518278' class='xr-var-data-in' type='checkbox'><label for='data-897cafb4-cca5-482f-a11d-7d56f0518278' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array([0, 1])</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span>draw</span></div><div class='xr-var-dims'>()</div><div class='xr-var-dtype'>int64</div><div class='xr-var-preview xr-preview'>38</div><input id='attrs-8acdfef1-9948-4cfe-b3e9-20333bbe6740' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-8acdfef1-9948-4cfe-b3e9-20333bbe6740' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-7128c9fc-d03c-4b71-9bc8-733cffbef142' class='xr-var-data-in' type='checkbox'><label for='data-7128c9fc-d03c-4b71-9bc8-733cffbef142' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array(38)</pre></div></li><li class='xr-var-item'><div class='xr-var-name'><span class='xr-has-index'>theta_dim_0</span></div><div class='xr-var-dims'>(theta_dim_0)</div><div class='xr-var-dtype'>int64</div><div class='xr-var-preview xr-preview'>0 1 2 3 4 5 6 ... 33 34 35 36 37 38</div><input id='attrs-5856a7e6-0f5e-41bb-9f89-bf74ede30176' class='xr-var-attrs-in' type='checkbox' disabled><label for='attrs-5856a7e6-0f5e-41bb-9f89-bf74ede30176' title='Show/Hide attributes'><svg class='icon xr-icon-file-text2'><use xlink:href='#icon-file-text2'></use></svg></label><input id='data-60c9c8f2-48fa-4820-a524-58d5b3d91d1f' class='xr-var-data-in' type='checkbox'><label for='data-60c9c8f2-48fa-4820-a524-58d5b3d91d1f' title='Show/Hide data repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-var-attrs'><dl class='xr-attrs'></dl></div><div class='xr-var-data'><pre>array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\n",
" 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,\n",
" 36, 37, 38])</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-0d210ab2-fa65-439f-8ac6-8a79e3aed240' class='xr-section-summary-in' type='checkbox' ><label for='section-0d210ab2-fa65-439f-8ac6-8a79e3aed240' class='xr-section-summary' >Indexes: <span>(2)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><ul class='xr-var-list'><li class='xr-var-item'><div class='xr-index-name'><div>chain</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-bfefa97e-3872-42df-8e10-f22e8b563e67' class='xr-index-data-in' type='checkbox'/><label for='index-bfefa97e-3872-42df-8e10-f22e8b563e67' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(Int64Index([0, 1], dtype=&#x27;int64&#x27;, name=&#x27;chain&#x27;))</pre></div></li><li class='xr-var-item'><div class='xr-index-name'><div>theta_dim_0</div></div><div class='xr-index-preview'>PandasIndex</div><div></div><input id='index-5a7b23f4-319c-4cf0-8a53-4e0af8d2965a' class='xr-index-data-in' type='checkbox'/><label for='index-5a7b23f4-319c-4cf0-8a53-4e0af8d2965a' title='Show/Hide index repr'><svg class='icon xr-icon-database'><use xlink:href='#icon-database'></use></svg></label><div class='xr-index-data'><pre>PandasIndex(Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n",
" 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n",
" 34, 35, 36, 37, 38],\n",
" dtype=&#x27;int64&#x27;, name=&#x27;theta_dim_0&#x27;))</pre></div></li></ul></div></li><li class='xr-section-item'><input id='section-52c9221b-7e6c-42ad-86a8-d6be67dc14ec' class='xr-section-summary-in' type='checkbox' disabled ><label for='section-52c9221b-7e6c-42ad-86a8-d6be67dc14ec' class='xr-section-summary' title='Expand/collapse section'>Attributes: <span>(0)</span></label><div class='xr-section-inline-details'></div><div class='xr-section-details'><dl class='xr-attrs'></dl></div></li></ul></div></div>"
]
},
"metadata": {},
"execution_count": 20
}
]
},
{
"cell_type": "code",
"source": [
"phoneme_set = set(phoneme for phoneme_seq in phoneme_dict.values() for phoneme in phoneme_seq)\n",
"phoneme_indices = {phoneme: i for i, phoneme in enumerate(phoneme_set)}\n",
"\n",
"# Convert phonemes to integer indices\n",
"phoneme_dict_int = {\n",
" word: [phoneme_indices[phoneme] for phoneme in phonemes]\n",
" for word, phonemes in phoneme_dict.items()\n",
"}\n",
"for word, phonemes in phoneme_dict_int.items():\n",
" phoneme_probs[word] = np.mean([np.prod(trace.posterior.theta[:, phoneme]) for phoneme in phonemes])\n"
],
"metadata": {
"id": "vxhpGspiQwkA"
},
"execution_count": 27,
"outputs": []
},
{
"cell_type": "code",
"source": [
"def grade_paragraph(paragraph):\n",
" words = paragraph.split()\n",
" word_probs = [phoneme_probs[word] for word in words if word in phoneme_probs]\n",
" print(word_probs)\n",
" if len(word_probs) > 0:\n",
" complexity_score = np.mean(word_probs)\n",
" else:\n",
" complexity_score = 0\n",
" return complexity_score\n",
"\n",
"with open('word_complexity.csv', 'w') as f:\n",
" for word, complexity_score in phoneme_probs.items():\n",
" f.write('{},{}\\n'.format(word, complexity_score))"
],
"metadata": {
"id": "pU10m7x2NBiJ"
},
"execution_count": 37,
"outputs": []
},
{
"cell_type": "code",
"source": [
"paragraph = \"There is a well in the village. There is also a pond. Cows, bulls, buffalos, horses, donkeys, dogs and goats drink water from the pond. They sometimes have a bath in it. You can also see many cats in the lanes. They like to drink milk and chase mice.\""
],
"metadata": {
"id": "OYOYTsOPUnli"
},
"execution_count": 38,
"outputs": []
},
{
"cell_type": "code",
"source": [
"grade_paragraph(paragraph)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "qGCSHr59NEsm",
"outputId": "52595904-d16c-48f8-8588-d5d431079a56"
},
"execution_count": 39,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[9.2845983573296e-133, 2.7186232337646675e-132, 1.0032740236021434e-135, 8.339816066898933e-133, 9.499390730469265e-133, 9.2845983573296e-133, 1.6760150427710444e-134, 2.7186232337646675e-132, 7.821208648712397e-134, 1.1861918875973315e-132, 6.88583332191121e-133, 1.2410362436430073e-133, 7.796415550600236e-133, 1.0912998969335796e-133, 9.499390730469265e-133, 8.06716725212648e-133, 7.618929031308053e-133, 2.7186232337646675e-132, 8.792881812051788e-133, 8.339816066898933e-133, 1.3326000092015514e-132, 1.6760150427710444e-134, 6.112587703035012e-134, 4.917774930995188e-134, 1.2509511887670658e-132, 8.339816066898933e-133, 9.499390730469265e-133, 1.6561120020092671e-133, 2.7796759547796124e-132, 1.2410362436430073e-133, 1.4304785147703589e-133, 1.1861918875973315e-132, 8.3227202828127e-134]\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"8.481378126468953e-133"
]
},
"metadata": {},
"execution_count": 39
}
]
},
{
"cell_type": "code",
"source": [
"grade_paragraph(\"buffalos\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "LFnoNShSUxBM",
"outputId": "64baa1c8-fb4c-4b56-fc0a-1560f9ceb788"
},
"execution_count": 36,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"9.212103441134748e-133"
]
},
"metadata": {},
"execution_count": 36
}
]
},
{
"cell_type": "code",
"source": [
"grade_paragraph(\"village\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "REIIAVX9U8vi",
"outputId": "0ce794b3-1ec9-4774-f332-89f4208629ea"
},
"execution_count": 34,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"1.1145970216057637e-132"
]
},
"metadata": {},
"execution_count": 34
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "RVkdQIJzVDfe"
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment