Skip to content

Instantly share code, notes, and snippets.

@Manikanta-Munnangi
Created November 18, 2019 14:11
Show Gist options
  • Save Manikanta-Munnangi/d621de9ec94b8e5033ec53d7d5c246f8 to your computer and use it in GitHub Desktop.
Save Manikanta-Munnangi/d621de9ec94b8e5033ec53d7d5c246f8 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# spacy import convention\n",
"import spacy\n",
"\n",
"# load the english model into nlp object.\n",
"nlp=spacy.load('en_core_web_sm') "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Sample text\n",
"text=\"Tesla founder Elon Musk has launched tech startup Neuralink.\"\n",
"\n",
"# returns doc container with attributes \n",
"doc=nlp(text)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# loop through doc\n",
"# return tokens with lemmas_ (actual english language word)\n",
"tokenized=[(token.text,token.dep_) for token in doc]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('Tesla', 'compound'),\n",
" ('founder', 'compound'),\n",
" ('Elon', 'compound'),\n",
" ('Musk', 'nsubj'),\n",
" ('has', 'aux'),\n",
" ('launched', 'ROOT'),\n",
" ('tech', 'compound'),\n",
" ('startup', 'compound'),\n",
" ('Neuralink', 'dobj'),\n",
" ('.', 'punct')]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# print result.\n",
"tokenized"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'auxiliary'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# But what 'aux' means, the output of has.use spacy.explain command\n",
"spacy.explain('aux')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- `You can find words which you don't understand using 'explain' function.`"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"f0dd00b65a6e4a7a85581e1b71943e40-0\" class=\"displacy\" width=\"1625\" height=\"399.5\" direction=\"ltr\" style=\"max-width: none; height: 399.5px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">Tesla</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">ADJ</tspan>\n",
"</text>\n",
"\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"225\">founder</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"225\">NOUN</tspan>\n",
"</text>\n",
"\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"400\">Elon</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"400\">PROPN</tspan>\n",
"</text>\n",
"\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"575\">Musk</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"575\">PROPN</tspan>\n",
"</text>\n",
"\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"750\">has</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"750\">AUX</tspan>\n",
"</text>\n",
"\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"925\">launched</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"925\">VERB</tspan>\n",
"</text>\n",
"\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1100\">tech</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1100\">NOUN</tspan>\n",
"</text>\n",
"\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1275\">startup</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1275\">NOUN</tspan>\n",
"</text>\n",
"\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n",
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1450\">Neuralink.</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1450\">PROPN</tspan>\n",
"</text>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-f0dd00b65a6e4a7a85581e1b71943e40-0-0\" stroke-width=\"2px\" d=\"M70,264.5 C70,177.0 215.0,177.0 215.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-f0dd00b65a6e4a7a85581e1b71943e40-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">compound</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M70,266.5 L62,254.5 78,254.5\" fill=\"currentColor\"/>\n",
"</g>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-f0dd00b65a6e4a7a85581e1b71943e40-0-1\" stroke-width=\"2px\" d=\"M245,264.5 C245,89.5 570.0,89.5 570.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-f0dd00b65a6e4a7a85581e1b71943e40-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">compound</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M245,266.5 L237,254.5 253,254.5\" fill=\"currentColor\"/>\n",
"</g>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-f0dd00b65a6e4a7a85581e1b71943e40-0-2\" stroke-width=\"2px\" d=\"M420,264.5 C420,177.0 565.0,177.0 565.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-f0dd00b65a6e4a7a85581e1b71943e40-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">compound</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M420,266.5 L412,254.5 428,254.5\" fill=\"currentColor\"/>\n",
"</g>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-f0dd00b65a6e4a7a85581e1b71943e40-0-3\" stroke-width=\"2px\" d=\"M595,264.5 C595,89.5 920.0,89.5 920.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-f0dd00b65a6e4a7a85581e1b71943e40-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nsubj</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M595,266.5 L587,254.5 603,254.5\" fill=\"currentColor\"/>\n",
"</g>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-f0dd00b65a6e4a7a85581e1b71943e40-0-4\" stroke-width=\"2px\" d=\"M770,264.5 C770,177.0 915.0,177.0 915.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-f0dd00b65a6e4a7a85581e1b71943e40-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">aux</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M770,266.5 L762,254.5 778,254.5\" fill=\"currentColor\"/>\n",
"</g>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-f0dd00b65a6e4a7a85581e1b71943e40-0-5\" stroke-width=\"2px\" d=\"M1120,264.5 C1120,89.5 1445.0,89.5 1445.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-f0dd00b65a6e4a7a85581e1b71943e40-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">compound</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M1120,266.5 L1112,254.5 1128,254.5\" fill=\"currentColor\"/>\n",
"</g>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-f0dd00b65a6e4a7a85581e1b71943e40-0-6\" stroke-width=\"2px\" d=\"M1295,264.5 C1295,177.0 1440.0,177.0 1440.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-f0dd00b65a6e4a7a85581e1b71943e40-0-6\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">compound</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M1295,266.5 L1287,254.5 1303,254.5\" fill=\"currentColor\"/>\n",
"</g>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-f0dd00b65a6e4a7a85581e1b71943e40-0-7\" stroke-width=\"2px\" d=\"M945,264.5 C945,2.0 1450.0,2.0 1450.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-f0dd00b65a6e4a7a85581e1b71943e40-0-7\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">dobj</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M1450.0,266.5 L1458.0,254.5 1442.0,254.5\" fill=\"currentColor\"/>\n",
"</g>\n",
"</svg>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Rather than see in bare text visualize it as a graph with displacy.\n",
"from spacy import displacy\n",
"\n",
"doc=nlp(text)\n",
"# specifying style is dep will render dependency_parser.\n",
"displacy.render(doc,style='dep',manual=False) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- `It's vaild on doc or span objects only not on results like tokenized which is a list.`"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# you can access head and multiple childrens.\n",
"family_tokens=[(token.text,token.head) for token in doc]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('Tesla', founder),\n",
" ('founder', Musk),\n",
" ('Elon', Musk),\n",
" ('Musk', launched),\n",
" ('has', launched),\n",
" ('launched', launched),\n",
" ('tech', Neuralink),\n",
" ('startup', Neuralink),\n",
" ('Neuralink', launched),\n",
" ('.', launched)]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"family_tokens"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment