Created
March 5, 2019 11:24
-
-
Save reflash/d443897f78afdbc99a87667e8bf37697 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Requirement already satisfied: spacy in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (2.0.18)\n", | |
"Requirement already satisfied: numpy>=1.15.0 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from spacy) (1.16.0)\n", | |
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from spacy) (1.0.1)\n", | |
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from spacy) (2.0.2)\n", | |
"Requirement already satisfied: preshed<2.1.0,>=2.0.1 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from spacy) (2.0.1)\n", | |
"Requirement already satisfied: thinc<6.13.0,>=6.12.1 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from spacy) (6.12.1)\n", | |
"Requirement already satisfied: plac<1.0.0,>=0.9.6 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from spacy) (0.9.6)\n", | |
"Requirement already satisfied: ujson>=1.35 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from spacy) (1.35)\n", | |
"Requirement already satisfied: dill<0.3,>=0.2 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from spacy) (0.2.9)\n", | |
"Requirement already satisfied: regex==2018.01.10 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from spacy) (2018.1.10)\n", | |
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from spacy) (2.21.0)\n", | |
"Requirement already satisfied: msgpack<0.6.0,>=0.5.6 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from thinc<6.13.0,>=6.12.1->spacy) (0.5.6)\n", | |
"Requirement already satisfied: msgpack-numpy<0.4.4 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from thinc<6.13.0,>=6.12.1->spacy) (0.4.3.2)\n", | |
"Requirement already satisfied: cytoolz<0.10,>=0.9.0 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from thinc<6.13.0,>=6.12.1->spacy) (0.9.0.1)\n", | |
"Requirement already satisfied: wrapt<1.11.0,>=1.10.0 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from thinc<6.13.0,>=6.12.1->spacy) (1.10.11)\n", | |
"Requirement already satisfied: tqdm<5.0.0,>=4.10.0 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from thinc<6.13.0,>=6.12.1->spacy) (4.29.1)\n", | |
"Requirement already satisfied: six<2.0.0,>=1.10.0 in c:\\users\\daniil_ekzarian\\appdata\\roaming\\python\\python37\\site-packages (from thinc<6.13.0,>=6.12.1->spacy) (1.12.0)\n", | |
"Requirement already satisfied: idna<2.9,>=2.5 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy) (2.8)\n", | |
"Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy) (3.0.4)\n", | |
"Requirement already satisfied: urllib3<1.25,>=1.21.1 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy) (1.24.1)\n", | |
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy) (2018.11.29)\n", | |
"Requirement already satisfied: toolz>=0.8.0 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from cytoolz<0.10,>=0.9.0->thinc<6.13.0,>=6.12.1->spacy) (0.9.0)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"You are using pip version 18.1, however version 19.0.3 is available.\n", | |
"You should consider upgrading via the 'python -m pip install --upgrade pip' command.\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Requirement already satisfied: en_core_web_sm==2.0.0 from https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.0.0/en_core_web_sm-2.0.0.tar.gz#egg=en_core_web_sm==2.0.0 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (2.0.0)\n", | |
"\n", | |
" Linking successful\n", | |
" c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages\\en_core_web_sm\n", | |
" -->\n", | |
" c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages\\spacy\\data\\en\n", | |
"\n", | |
" You can now load the model via spacy.load('en')\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"You are using pip version 18.1, however version 19.0.3 is available.\n", | |
"You should consider upgrading via the 'python -m pip install --upgrade pip' command.\n", | |
"You do not have sufficient privilege to perform this operation.\n" | |
] | |
} | |
], | |
"source": [ | |
"import sys\n", | |
"!{sys.executable} -m pip install spacy\n", | |
"!{sys.executable} -m spacy download en" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" id=\"521-0\" class=\"displacy\" width=\"750\" height=\"312.0\" style=\"max-width: none; height: 312.0px; color: #000000; background: #ffffff; font-family: Arial\">\n", | |
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"222.0\">\n", | |
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">This</tspan>\n", | |
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">DET</tspan>\n", | |
"</text>\n", | |
"\n", | |
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"222.0\">\n", | |
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"225\">is</tspan>\n", | |
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"225\">VERB</tspan>\n", | |
"</text>\n", | |
"\n", | |
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"222.0\">\n", | |
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"400\">a</tspan>\n", | |
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"400\">DET</tspan>\n", | |
"</text>\n", | |
"\n", | |
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"222.0\">\n", | |
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"575\">sentence.</tspan>\n", | |
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"575\">NOUN</tspan>\n", | |
"</text>\n", | |
"\n", | |
"<g class=\"displacy-arrow\">\n", | |
" <path class=\"displacy-arc\" id=\"arrow-521-0-0\" stroke-width=\"2px\" d=\"M70,177.0 C70,89.5 220.0,89.5 220.0,177.0\" fill=\"none\" stroke=\"currentColor\"/>\n", | |
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", | |
" <textPath xlink:href=\"#arrow-521-0-0\" class=\"displacy-label\" startOffset=\"50%\" fill=\"currentColor\" text-anchor=\"middle\">nsubj</textPath>\n", | |
" </text>\n", | |
" <path class=\"displacy-arrowhead\" d=\"M70,179.0 L62,167.0 78,167.0\" fill=\"currentColor\"/>\n", | |
"</g>\n", | |
"\n", | |
"<g class=\"displacy-arrow\">\n", | |
" <path class=\"displacy-arc\" id=\"arrow-521-0-1\" stroke-width=\"2px\" d=\"M420,177.0 C420,89.5 570.0,89.5 570.0,177.0\" fill=\"none\" stroke=\"currentColor\"/>\n", | |
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", | |
" <textPath xlink:href=\"#arrow-521-0-1\" class=\"displacy-label\" startOffset=\"50%\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n", | |
" </text>\n", | |
" <path class=\"displacy-arrowhead\" d=\"M420,179.0 L412,167.0 428,167.0\" fill=\"currentColor\"/>\n", | |
"</g>\n", | |
"\n", | |
"<g class=\"displacy-arrow\">\n", | |
" <path class=\"displacy-arc\" id=\"arrow-521-0-2\" stroke-width=\"2px\" d=\"M245,177.0 C245,2.0 575.0,2.0 575.0,177.0\" fill=\"none\" stroke=\"currentColor\"/>\n", | |
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", | |
" <textPath xlink:href=\"#arrow-521-0-2\" class=\"displacy-label\" startOffset=\"50%\" fill=\"currentColor\" text-anchor=\"middle\">attr</textPath>\n", | |
" </text>\n", | |
" <path class=\"displacy-arrowhead\" d=\"M575.0,179.0 L583.0,167.0 567.0,167.0\" fill=\"currentColor\"/>\n", | |
"</g>\n", | |
"</svg>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"import spacy\n", | |
"from spacy import displacy\n", | |
"\n", | |
"nlp = spacy.load('en_core_web_sm')\n", | |
"doc = nlp(u'This is a sentence.')\n", | |
"displacy.render(doc, style='dep', jupyter=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div class=\"entities\" style=\"line-height: 2.5\">But \n", | |
"<mark class=\"entity\" style=\"background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n", | |
" Google\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">ORG</span>\n", | |
"</mark>\n", | |
" is starting from behind. The company made a late push\n", | |
"<mark class=\"entity\" style=\"background: #feca74; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n", | |
" \n", | |
"\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">GPE</span>\n", | |
"</mark>\n", | |
"into hardware, and \n", | |
"<mark class=\"entity\" style=\"background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n", | |
" Apple\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">ORG</span>\n", | |
"</mark>\n", | |
"’s Siri, available on \n", | |
"<mark class=\"entity\" style=\"background: #bfeeb7; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n", | |
" iPhones\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">PRODUCT</span>\n", | |
"</mark>\n", | |
", and \n", | |
"<mark class=\"entity\" style=\"background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n", | |
" Amazon\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">ORG</span>\n", | |
"</mark>\n", | |
"’s \n", | |
"<mark class=\"entity\" style=\"background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n", | |
" Alexa\n", | |
"\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">ORG</span>\n", | |
"</mark>\n", | |
"software, which runs on its \n", | |
"<mark class=\"entity\" style=\"background: #feca74; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n", | |
" Echo\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">GPE</span>\n", | |
"</mark>\n", | |
" and \n", | |
"<mark class=\"entity\" style=\"background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n", | |
" Dot\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">ORG</span>\n", | |
"</mark>\n", | |
" devices, have clear leads in\n", | |
"<mark class=\"entity\" style=\"background: #feca74; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n", | |
" \n", | |
"\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">GPE</span>\n", | |
"</mark>\n", | |
"consumer adoption.</div>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"text = \"\"\"But Google is starting from behind. The company made a late push\n", | |
"into hardware, and Apple’s Siri, available on iPhones, and Amazon’s Alexa\n", | |
"software, which runs on its Echo and Dot devices, have clear leads in\n", | |
"consumer adoption.\"\"\"\n", | |
"\n", | |
"doc = nlp(text)\n", | |
"displacy.render(doc, style='ent', jupyter=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"dog dog 1.0\n", | |
"dog cat 0.53906965\n", | |
"dog banana 0.28761005\n", | |
"cat dog 0.53906965\n", | |
"cat cat 1.0\n", | |
"cat banana 0.48752162\n", | |
"banana dog 0.28761005\n", | |
"banana cat 0.48752162\n", | |
"banana banana 1.0\n" | |
] | |
} | |
], | |
"source": [ | |
"tokens = nlp(u'dog cat banana')\n", | |
"\n", | |
"for token1 in tokens:\n", | |
" for token2 in tokens:\n", | |
" print(token1.text, token2.text, token1.similarity(token2))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment