Skip to content

Instantly share code, notes, and snippets.

@reflash
Created March 5, 2019 11:24
Show Gist options
  • Save reflash/d443897f78afdbc99a87667e8bf37697 to your computer and use it in GitHub Desktop.
Save reflash/d443897f78afdbc99a87667e8bf37697 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: spacy in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (2.0.18)\n",
"Requirement already satisfied: numpy>=1.15.0 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from spacy) (1.16.0)\n",
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from spacy) (1.0.1)\n",
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from spacy) (2.0.2)\n",
"Requirement already satisfied: preshed<2.1.0,>=2.0.1 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from spacy) (2.0.1)\n",
"Requirement already satisfied: thinc<6.13.0,>=6.12.1 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from spacy) (6.12.1)\n",
"Requirement already satisfied: plac<1.0.0,>=0.9.6 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from spacy) (0.9.6)\n",
"Requirement already satisfied: ujson>=1.35 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from spacy) (1.35)\n",
"Requirement already satisfied: dill<0.3,>=0.2 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from spacy) (0.2.9)\n",
"Requirement already satisfied: regex==2018.01.10 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from spacy) (2018.1.10)\n",
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from spacy) (2.21.0)\n",
"Requirement already satisfied: msgpack<0.6.0,>=0.5.6 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from thinc<6.13.0,>=6.12.1->spacy) (0.5.6)\n",
"Requirement already satisfied: msgpack-numpy<0.4.4 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from thinc<6.13.0,>=6.12.1->spacy) (0.4.3.2)\n",
"Requirement already satisfied: cytoolz<0.10,>=0.9.0 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from thinc<6.13.0,>=6.12.1->spacy) (0.9.0.1)\n",
"Requirement already satisfied: wrapt<1.11.0,>=1.10.0 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from thinc<6.13.0,>=6.12.1->spacy) (1.10.11)\n",
"Requirement already satisfied: tqdm<5.0.0,>=4.10.0 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from thinc<6.13.0,>=6.12.1->spacy) (4.29.1)\n",
"Requirement already satisfied: six<2.0.0,>=1.10.0 in c:\\users\\daniil_ekzarian\\appdata\\roaming\\python\\python37\\site-packages (from thinc<6.13.0,>=6.12.1->spacy) (1.12.0)\n",
"Requirement already satisfied: idna<2.9,>=2.5 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy) (2.8)\n",
"Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy) (3.0.4)\n",
"Requirement already satisfied: urllib3<1.25,>=1.21.1 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy) (1.24.1)\n",
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy) (2018.11.29)\n",
"Requirement already satisfied: toolz>=0.8.0 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from cytoolz<0.10,>=0.9.0->thinc<6.13.0,>=6.12.1->spacy) (0.9.0)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"You are using pip version 18.1, however version 19.0.3 is available.\n",
"You should consider upgrading via the 'python -m pip install --upgrade pip' command.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: en_core_web_sm==2.0.0 from https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.0.0/en_core_web_sm-2.0.0.tar.gz#egg=en_core_web_sm==2.0.0 in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (2.0.0)\n",
"\n",
" Linking successful\n",
" c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages\\en_core_web_sm\n",
" -->\n",
" c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages\\spacy\\data\\en\n",
"\n",
" You can now load the model via spacy.load('en')\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"You are using pip version 18.1, however version 19.0.3 is available.\n",
"You should consider upgrading via the 'python -m pip install --upgrade pip' command.\n",
"You do not have sufficient privilege to perform this operation.\n"
]
}
],
"source": [
"import sys\n",
"!{sys.executable} -m pip install spacy\n",
"!{sys.executable} -m spacy download en"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" id=\"521-0\" class=\"displacy\" width=\"750\" height=\"312.0\" style=\"max-width: none; height: 312.0px; color: #000000; background: #ffffff; font-family: Arial\">\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"222.0\">\n",
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">This</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">DET</tspan>\n",
"</text>\n",
"\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"222.0\">\n",
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"225\">is</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"225\">VERB</tspan>\n",
"</text>\n",
"\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"222.0\">\n",
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"400\">a</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"400\">DET</tspan>\n",
"</text>\n",
"\n",
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"222.0\">\n",
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"575\">sentence.</tspan>\n",
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"575\">NOUN</tspan>\n",
"</text>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-521-0-0\" stroke-width=\"2px\" d=\"M70,177.0 C70,89.5 220.0,89.5 220.0,177.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-521-0-0\" class=\"displacy-label\" startOffset=\"50%\" fill=\"currentColor\" text-anchor=\"middle\">nsubj</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M70,179.0 L62,167.0 78,167.0\" fill=\"currentColor\"/>\n",
"</g>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-521-0-1\" stroke-width=\"2px\" d=\"M420,177.0 C420,89.5 570.0,89.5 570.0,177.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-521-0-1\" class=\"displacy-label\" startOffset=\"50%\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M420,179.0 L412,167.0 428,167.0\" fill=\"currentColor\"/>\n",
"</g>\n",
"\n",
"<g class=\"displacy-arrow\">\n",
" <path class=\"displacy-arc\" id=\"arrow-521-0-2\" stroke-width=\"2px\" d=\"M245,177.0 C245,2.0 575.0,2.0 575.0,177.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
" <textPath xlink:href=\"#arrow-521-0-2\" class=\"displacy-label\" startOffset=\"50%\" fill=\"currentColor\" text-anchor=\"middle\">attr</textPath>\n",
" </text>\n",
" <path class=\"displacy-arrowhead\" d=\"M575.0,179.0 L583.0,167.0 567.0,167.0\" fill=\"currentColor\"/>\n",
"</g>\n",
"</svg>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import spacy\n",
"from spacy import displacy\n",
"\n",
"nlp = spacy.load('en_core_web_sm')\n",
"doc = nlp(u'This is a sentence.')\n",
"displacy.render(doc, style='dep', jupyter=True)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div class=\"entities\" style=\"line-height: 2.5\">But \n",
"<mark class=\"entity\" style=\"background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
" Google\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">ORG</span>\n",
"</mark>\n",
" is starting from behind. The company made a late push\n",
"<mark class=\"entity\" style=\"background: #feca74; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
" \n",
"\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">GPE</span>\n",
"</mark>\n",
"into hardware, and \n",
"<mark class=\"entity\" style=\"background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
" Apple\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">ORG</span>\n",
"</mark>\n",
"’s Siri, available on \n",
"<mark class=\"entity\" style=\"background: #bfeeb7; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
" iPhones\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">PRODUCT</span>\n",
"</mark>\n",
", and \n",
"<mark class=\"entity\" style=\"background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
" Amazon\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">ORG</span>\n",
"</mark>\n",
"’s \n",
"<mark class=\"entity\" style=\"background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
" Alexa\n",
"\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">ORG</span>\n",
"</mark>\n",
"software, which runs on its \n",
"<mark class=\"entity\" style=\"background: #feca74; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
" Echo\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">GPE</span>\n",
"</mark>\n",
" and \n",
"<mark class=\"entity\" style=\"background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
" Dot\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">ORG</span>\n",
"</mark>\n",
" devices, have clear leads in\n",
"<mark class=\"entity\" style=\"background: #feca74; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone\">\n",
" \n",
"\n",
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem\">GPE</span>\n",
"</mark>\n",
"consumer adoption.</div>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"text = \"\"\"But Google is starting from behind. The company made a late push\n",
"into hardware, and Apple’s Siri, available on iPhones, and Amazon’s Alexa\n",
"software, which runs on its Echo and Dot devices, have clear leads in\n",
"consumer adoption.\"\"\"\n",
"\n",
"doc = nlp(text)\n",
"displacy.render(doc, style='ent', jupyter=True)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dog dog 1.0\n",
"dog cat 0.53906965\n",
"dog banana 0.28761005\n",
"cat dog 0.53906965\n",
"cat cat 1.0\n",
"cat banana 0.48752162\n",
"banana dog 0.28761005\n",
"banana cat 0.48752162\n",
"banana banana 1.0\n"
]
}
],
"source": [
"tokens = nlp(u'dog cat banana')\n",
"\n",
"for token1 in tokens:\n",
" for token2 in tokens:\n",
" print(token1.text, token2.text, token1.similarity(token2))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment