Skip to content

Instantly share code, notes, and snippets.

@Manikanta-Munnangi
Created November 18, 2019 16:23
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Manikanta-Munnangi/943666ad279c97549654718a14657c80 to your computer and use it in GitHub Desktop.
Save Manikanta-Munnangi/943666ad279c97549654718a14657c80 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# spacy import convention\n",
"import spacy\n",
"\n",
"# load the english model into nlp object.\n",
"nlp=spacy.load('en_core_web_md') "
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([-2.3205e-01, 4.7468e-01, -3.8264e-01, 2.2248e-03, -1.0493e-01,\n",
" 1.1612e-01, -5.0251e-02, 1.2927e-01, 8.7639e-02, 2.6391e+00,\n",
" -3.7071e-01, -2.9460e-01, -1.2722e-01, -3.7028e-02, -1.3964e-01,\n",
" -9.8518e-02, -2.2704e-01, 1.2254e+00, -4.3827e-01, -4.2383e-01,\n",
" 4.9285e-01, -2.3314e-01, 9.7892e-02, -2.7542e-01, -2.6583e-01,\n",
" 1.4518e-01, -1.0652e-02, 1.1067e-01, 1.6126e-01, -4.2688e-01,\n",
" -3.0968e-01, 1.2774e-01, 9.5535e-02, -5.0221e-02, 2.6677e-01,\n",
" 1.4821e-01, 1.7805e-01, 8.8508e-02, -3.6138e-01, -2.1068e-01,\n",
" -2.6420e-01, -3.0030e-01, 1.7674e-01, -2.0741e-01, 2.3360e-01,\n",
" 2.6663e-02, -2.7939e-01, -1.4522e-02, -3.0973e-02, 4.3729e-02,\n",
" -3.0191e-01, 9.3855e-02, -2.0135e-01, -1.4267e-01, 3.3124e-01,\n",
" 9.8036e-02, 3.6001e-02, -5.7850e-02, 1.2101e-01, 4.1569e-02,\n",
" -3.8834e-02, -6.6843e-02, -2.4738e-01, 1.4838e-01, 4.1011e-01,\n",
" -3.0283e-01, 9.4704e-02, 3.2583e-01, 3.1955e-01, -4.3357e-02,\n",
" 2.0606e-01, -9.6981e-02, 4.5202e-01, -2.4532e-01, 2.6683e-01,\n",
" 2.6141e-01, 1.2174e-01, -3.9941e-01, -1.2916e-01, 2.2634e-01,\n",
" 1.2218e-01, 1.6536e-01, -5.2466e-02, -8.6235e-02, 1.2232e-02,\n",
" -3.9760e-01, -2.5987e-01, -6.4113e-01, 2.4669e-01, 2.9575e-02,\n",
" -2.9733e-01, -2.5994e-01, -6.1239e-01, 3.2332e-01, 2.2375e-01,\n",
" 2.1916e-01, -1.0905e-02, -7.2927e-02, -3.2219e-01, -6.5715e-02,\n",
" -1.7247e-01, 4.0714e-02, 1.6625e-01, -1.8120e-01, -1.8531e-01,\n",
" -1.1575e+00, -4.7285e-02, 2.4866e-02, 4.0405e-03, -6.1920e-02,\n",
" 8.7754e-02, -4.1669e-01, 8.6682e-02, -3.7720e-01, 1.6166e-01,\n",
" -1.2879e-01, -1.6494e-01, -1.1212e-02, -1.4810e-01, 9.9342e-02,\n",
" 1.5603e-01, -2.8030e-01, -9.5092e-02, 7.7952e-02, 8.8172e-02,\n",
" 2.2930e-01, -1.0321e-01, -3.8966e-01, 1.9519e-01, -8.7815e-02,\n",
" -1.5861e-01, 1.1627e-01, 8.8138e-02, 1.1262e-01, 1.8212e-01,\n",
" 1.8005e-02, -5.5187e-02, -3.8818e-02, 1.6536e-01, -2.5814e-01,\n",
" -1.8516e+00, -2.9996e-01, 3.3106e-02, 3.2293e-01, -1.6417e-01,\n",
" -2.7445e-01, 5.1582e-02, 3.4203e-01, -4.3025e-01, -4.5816e-02,\n",
" 2.3542e-01, 1.8271e-01, -8.9827e-02, -2.0280e-02, -1.0056e-02,\n",
" -7.5604e-02, 1.5922e-02, 1.5616e-01, -3.8949e-01, -5.8165e-02,\n",
" -4.3763e-01, 2.4587e-01, -2.3169e-01, -1.4508e-01, 3.5845e-01,\n",
" 1.2437e-01, 2.2588e-01, -1.8963e-02, 7.9287e-02, 1.6775e-01,\n",
" -1.2729e-01, -3.2950e-01, 3.1048e-01, -1.6959e-01, 5.7082e-02,\n",
" -9.8536e-02, -1.1715e-02, 3.9690e-01, 1.0493e-01, 1.9083e-01,\n",
" 1.3871e-01, -1.8307e-02, -7.8323e-02, -4.5149e-02, 6.6471e-02,\n",
" 1.7835e-01, -4.3998e-02, -1.9136e-01, -8.8387e-02, 4.2414e-01,\n",
" 1.1562e-01, 8.0458e-02, -1.0350e-01, -1.8200e-01, -2.0045e-01,\n",
" 1.9755e-01, 3.8457e-02, -1.1081e-01, 2.2978e-01, 3.5781e-01,\n",
" -1.6376e-01, -2.3062e-01, -2.4412e-01, -7.3929e-02, -1.2747e-01,\n",
" 1.4730e-01, 2.5954e-01, 1.8571e-01, 2.7923e-01, 1.8186e-01,\n",
" -1.4550e-01, -2.5523e-01, -2.3418e-01, -2.3684e-01, 5.7909e-02,\n",
" 1.3913e-01, -1.4280e-01, 9.8092e-02, -2.4884e-01, -2.2587e-01,\n",
" 2.2812e-01, 2.3718e-01, -6.6049e-02, 6.3126e-02, -3.4434e-03,\n",
" 2.6542e-01, -4.3094e-02, 9.1002e-02, -2.9563e-02, 1.3626e-01,\n",
" -2.2368e-01, 1.4869e-01, 1.7428e-02, 2.6551e-01, -2.0984e-01,\n",
" -1.6786e-01, 2.1192e-01, 1.2735e-01, 1.6441e-01, 3.3131e-01,\n",
" 1.0661e-01, -2.1155e-01, 2.8474e-02, -9.9419e-02, 3.4635e-01,\n",
" -4.0166e-01, -1.9083e-01, -2.8156e-01, -8.1996e-02, 2.4322e-01,\n",
" 3.0341e-01, -1.4984e-01, -2.9952e-01, -2.8089e-01, -8.2551e-02,\n",
" -3.5457e-01, 8.3108e-02, 7.3193e-02, 5.8555e-02, 4.7347e-02,\n",
" 3.3200e-01, 1.5465e-01, -6.5075e-02, 6.3738e-03, 2.6690e-01,\n",
" -3.3819e-01, -2.1204e-01, 2.2368e-01, 6.2783e-01, 7.0440e-01,\n",
" -2.2196e-01, -1.0377e-01, 6.9900e-02, -1.3201e-01, -2.6255e-01,\n",
" -1.9671e-02, -1.1906e-01, 3.2839e-02, -3.1207e-02, 2.5083e-01,\n",
" -1.4702e-01, 4.4411e-01, -2.1465e-01, 4.5018e-02, -1.4012e-01,\n",
" 4.6586e-02, 2.4790e-01, -1.3205e-01, 1.4456e-01, -1.8638e-01,\n",
" -8.6773e-02, 1.3312e-01, 1.8741e-03, 4.4091e-02, 2.8882e-01,\n",
" -9.0016e-02, -1.8108e-01, 3.3178e-01, 3.1545e-01, 3.7972e-01],\n",
" dtype=float32)"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# can access vector from of as word\n",
"nlp.vocab[\"how\"].vector"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check for other word that don't have vector form \n",
"nlp.vocab[\"jupyter\"].vector"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- `so before preprocessing you need to make sure the word has vector form.`"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(True, False)"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check with existence of vector with .vector_form attribute\n",
"nlp.vocab[\"how\"].has_vector, nlp.vocab[\"jupyter\"].has_vector"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"# Two Sample sentences\n",
"text=\"how are you\"\n",
"text1=\"how you doing\"\n",
"\n",
"# returns doc containers \n",
"doc=nlp(text)\n",
"doc1=nlp(text1)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.9106663802758767"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# similarity between sentences\n",
"doc.similarity(doc1)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment