Skip to content

Instantly share code, notes, and snippets.

@shihono
Created June 23, 2024 08:27
Show Gist options
  • Save shihono/25b798adc26629b7a2688b51a1ef1480 to your computer and use it in GitHub Desktop.
Save shihono/25b798adc26629b7a2688b51a1ef1480 to your computer and use it in GitHub Desktop.
spacy_morph_ja.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"toc_visible": true,
"authorship_tag": "ABX9TyPU05TbyMRKrNecGvlJVd52",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/shihono/25b798adc26629b7a2688b51a1ef1480/spacy_morph_ja.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"source": [
"# spacy morph\n",
"\n",
"日本語の token.morph の確認"
],
"metadata": {
"id": "GZd-xoIh7uIE"
}
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "4V2NVWnkaSl1",
"outputId": "08204326-d5d2-45f8-ede1-344da6d5a3fa"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: spacy in /usr/local/lib/python3.10/dist-packages (3.7.5)\n",
"Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.10/dist-packages (from spacy) (3.0.12)\n",
"Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (1.0.5)\n",
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (1.0.10)\n",
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy) (2.0.8)\n",
"Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy) (3.0.9)\n",
"Requirement already satisfied: thinc<8.3.0,>=8.2.2 in /usr/local/lib/python3.10/dist-packages (from spacy) (8.2.4)\n",
"Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.10/dist-packages (from spacy) (1.1.3)\n",
"Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.10/dist-packages (from spacy) (2.4.8)\n",
"Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.10/dist-packages (from spacy) (2.0.10)\n",
"Requirement already satisfied: weasel<0.5.0,>=0.1.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (0.4.1)\n",
"Requirement already satisfied: typer<1.0.0,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (0.12.3)\n",
"Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (4.66.4)\n",
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (2.31.0)\n",
"Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.10/dist-packages (from spacy) (2.7.4)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from spacy) (3.1.4)\n",
"Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from spacy) (67.7.2)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (24.1)\n",
"Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (3.4.0)\n",
"Requirement already satisfied: numpy>=1.19.0 in /usr/local/lib/python3.10/dist-packages (from spacy) (1.25.2)\n",
"Requirement already satisfied: language-data>=1.2 in /usr/local/lib/python3.10/dist-packages (from langcodes<4.0.0,>=3.2.0->spacy) (1.2.0)\n",
"Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (0.7.0)\n",
"Requirement already satisfied: pydantic-core==2.18.4 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (2.18.4)\n",
"Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (4.12.2)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (3.7)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy) (2024.6.2)\n",
"Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.10/dist-packages (from thinc<8.3.0,>=8.2.2->spacy) (0.7.11)\n",
"Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from thinc<8.3.0,>=8.2.2->spacy) (0.1.5)\n",
"Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy) (8.1.7)\n",
"Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy) (1.5.4)\n",
"Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy) (13.7.1)\n",
"Requirement already satisfied: cloudpathlib<1.0.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from weasel<0.5.0,>=0.1.0->spacy) (0.18.1)\n",
"Requirement already satisfied: smart-open<8.0.0,>=5.2.1 in /usr/local/lib/python3.10/dist-packages (from weasel<0.5.0,>=0.1.0->spacy) (7.0.4)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->spacy) (2.1.5)\n",
"Requirement already satisfied: marisa-trie>=0.7.7 in /usr/local/lib/python3.10/dist-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy) (1.2.0)\n",
"Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy) (3.0.0)\n",
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy) (2.16.1)\n",
"Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from smart-open<8.0.0,>=5.2.1->weasel<0.5.0,>=0.1.0->spacy) (1.14.1)\n",
"Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy) (0.1.2)\n",
"Collecting en-core-web-sm==3.7.1\n",
" Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.8/12.8 MB\u001b[0m \u001b[31m31.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: spacy<3.8.0,>=3.7.2 in /usr/local/lib/python3.10/dist-packages (from en-core-web-sm==3.7.1) (3.7.5)\n",
"Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.12)\n",
"Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.5)\n",
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.10)\n",
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.8)\n",
"Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.9)\n",
"Requirement already satisfied: thinc<8.3.0,>=8.2.2 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.2.4)\n",
"Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.1.3)\n",
"Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.4.8)\n",
"Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.10)\n",
"Requirement already satisfied: weasel<0.5.0,>=0.1.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.4.1)\n",
"Requirement already satisfied: typer<1.0.0,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.12.3)\n",
"Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.66.4)\n",
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.31.0)\n",
"Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.7.4)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.1.4)\n",
"Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (67.7.2)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (24.1)\n",
"Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.4.0)\n",
"Requirement already satisfied: numpy>=1.19.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.25.2)\n",
"Requirement already satisfied: language-data>=1.2 in /usr/local/lib/python3.10/dist-packages (from langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.2.0)\n",
"Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.7.0)\n",
"Requirement already satisfied: pydantic-core==2.18.4 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.18.4)\n",
"Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.12.2)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.7)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2024.6.2)\n",
"Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.10/dist-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.7.11)\n",
"Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.1.5)\n",
"Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.1.7)\n",
"Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.5.4)\n",
"Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (13.7.1)\n",
"Requirement already satisfied: cloudpathlib<1.0.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from weasel<0.5.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.18.1)\n",
"Requirement already satisfied: smart-open<8.0.0,>=5.2.1 in /usr/local/lib/python3.10/dist-packages (from weasel<0.5.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (7.0.4)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.1.5)\n",
"Requirement already satisfied: marisa-trie>=0.7.7 in /usr/local/lib/python3.10/dist-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.2.0)\n",
"Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.0)\n",
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.16.1)\n",
"Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from smart-open<8.0.0,>=5.2.1->weasel<0.5.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.14.1)\n",
"Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.1.2)\n",
"\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
"You can now load the package via spacy.load('en_core_web_sm')\n",
"\u001b[38;5;3m⚠ Restart to reload dependencies\u001b[0m\n",
"If you are in a Jupyter or Colab notebook, you may need to restart Python in\n",
"order to load all the package's dependencies. You can do this by selecting the\n",
"'Restart kernel' or 'Restart runtime' option.\n",
"Collecting ja-core-news-sm==3.7.0\n",
" Downloading https://github.com/explosion/spacy-models/releases/download/ja_core_news_sm-3.7.0/ja_core_news_sm-3.7.0-py3-none-any.whl (12.1 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.1/12.1 MB\u001b[0m \u001b[31m33.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: spacy<3.8.0,>=3.7.0 in /usr/local/lib/python3.10/dist-packages (from ja-core-news-sm==3.7.0) (3.7.5)\n",
"Requirement already satisfied: sudachipy!=0.6.1,>=0.5.2 in /usr/local/lib/python3.10/dist-packages (from ja-core-news-sm==3.7.0) (0.6.8)\n",
"Requirement already satisfied: sudachidict-core>=20211220 in /usr/local/lib/python3.10/dist-packages (from ja-core-news-sm==3.7.0) (20240409)\n",
"Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (3.0.12)\n",
"Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (1.0.5)\n",
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (1.0.10)\n",
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (2.0.8)\n",
"Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (3.0.9)\n",
"Requirement already satisfied: thinc<8.3.0,>=8.2.2 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (8.2.4)\n",
"Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (1.1.3)\n",
"Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (2.4.8)\n",
"Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (2.0.10)\n",
"Requirement already satisfied: weasel<0.5.0,>=0.1.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (0.4.1)\n",
"Requirement already satisfied: typer<1.0.0,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (0.12.3)\n",
"Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (4.66.4)\n",
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (2.31.0)\n",
"Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (2.7.4)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (3.1.4)\n",
"Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (67.7.2)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (24.1)\n",
"Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (3.4.0)\n",
"Requirement already satisfied: numpy>=1.19.0 in /usr/local/lib/python3.10/dist-packages (from spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (1.25.2)\n",
"Requirement already satisfied: language-data>=1.2 in /usr/local/lib/python3.10/dist-packages (from langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (1.2.0)\n",
"Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (0.7.0)\n",
"Requirement already satisfied: pydantic-core==2.18.4 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (2.18.4)\n",
"Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (4.12.2)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (3.7)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (2024.6.2)\n",
"Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.10/dist-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (0.7.11)\n",
"Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (0.1.5)\n",
"Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (8.1.7)\n",
"Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (1.5.4)\n",
"Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (13.7.1)\n",
"Requirement already satisfied: cloudpathlib<1.0.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from weasel<0.5.0,>=0.1.0->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (0.18.1)\n",
"Requirement already satisfied: smart-open<8.0.0,>=5.2.1 in /usr/local/lib/python3.10/dist-packages (from weasel<0.5.0,>=0.1.0->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (7.0.4)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (2.1.5)\n",
"Requirement already satisfied: marisa-trie>=0.7.7 in /usr/local/lib/python3.10/dist-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (1.2.0)\n",
"Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (3.0.0)\n",
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (2.16.1)\n",
"Requirement already satisfied: wrapt in /usr/local/lib/python3.10/dist-packages (from smart-open<8.0.0,>=5.2.1->weasel<0.5.0,>=0.1.0->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (1.14.1)\n",
"Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy<3.8.0,>=3.7.0->ja-core-news-sm==3.7.0) (0.1.2)\n",
"\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
"You can now load the package via spacy.load('ja_core_news_sm')\n",
"\u001b[38;5;3m⚠ Restart to reload dependencies\u001b[0m\n",
"If you are in a Jupyter or Colab notebook, you may need to restart Python in\n",
"order to load all the package's dependencies. You can do this by selecting the\n",
"'Restart kernel' or 'Restart runtime' option.\n"
]
}
],
"source": [
"# spacyとモデルをインストール\n",
"\n",
"!pip install spacy\n",
"!python -m spacy download en_core_web_sm\n",
"!python -m spacy download ja_core_news_sm"
]
},
{
"cell_type": "code",
"source": [
"!pip list | grep spacy"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "2my5Zu2Y8Ybp",
"outputId": "4580052f-c7ae-4b22-ce38-3655c2651bbe"
},
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"spacy 3.7.5\n",
"spacy-legacy 3.0.12\n",
"spacy-loggers 1.0.5\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## English モデルの動作確認"
],
"metadata": {
"id": "MEhl3-CT72uJ"
}
},
{
"cell_type": "code",
"source": [
"import spacy\n",
"\n",
"# Load English tokenizer, tagger, parser and NER\n",
"nlp = spacy.load(\"en_core_web_sm\")\n",
"\n",
"# Process whole documents\n",
"text = (\"When Sebastian Thrun started working on self-driving cars at \"\n",
" \"Google in 2007, few people outside of the company took him \"\n",
" \"seriously. “I can tell you very senior CEOs of major American \"\n",
" \"car companies would shake my hand and turn away because I wasn’t \"\n",
" \"worth talking to,” said Thrun, in an interview with Recode earlier \"\n",
" \"this week.\")\n",
"doc = nlp(text)\n",
"\n",
"for token in doc:\n",
" print(token.text, token.pos_, token.tag_, token.morph.to_dict())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "IObnSA0Fn1-B",
"outputId": "a47a870e-e592-4b11-e9f0-98231a0796b7"
},
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"When SCONJ WRB {}\n",
"Sebastian ADJ JJ {'Degree': 'Pos'}\n",
"Thrun PROPN NNP {'Number': 'Sing'}\n",
"started VERB VBD {'Tense': 'Past', 'VerbForm': 'Fin'}\n",
"working VERB VBG {'Aspect': 'Prog', 'Tense': 'Pres', 'VerbForm': 'Part'}\n",
"on ADP IN {}\n",
"self NOUN NN {'Number': 'Sing'}\n",
"- PUNCT HYPH {'PunctType': 'Dash'}\n",
"driving VERB VBG {'Aspect': 'Prog', 'Tense': 'Pres', 'VerbForm': 'Part'}\n",
"cars NOUN NNS {'Number': 'Plur'}\n",
"at ADP IN {}\n",
"Google PROPN NNP {'Number': 'Sing'}\n",
"in ADP IN {}\n",
"2007 NUM CD {'NumType': 'Card'}\n",
", PUNCT , {'PunctType': 'Comm'}\n",
"few ADJ JJ {'Degree': 'Pos'}\n",
"people NOUN NNS {'Number': 'Plur'}\n",
"outside ADP IN {}\n",
"of ADP IN {}\n",
"the DET DT {'Definite': 'Def', 'PronType': 'Art'}\n",
"company NOUN NN {'Number': 'Sing'}\n",
"took VERB VBD {'Tense': 'Past', 'VerbForm': 'Fin'}\n",
"him PRON PRP {'Case': 'Acc', 'Gender': 'Masc', 'Number': 'Sing', 'Person': '3', 'PronType': 'Prs'}\n",
"seriously ADV RB {}\n",
". PUNCT . {'PunctType': 'Peri'}\n",
"“ PUNCT `` {'PunctSide': 'Ini', 'PunctType': 'Quot'}\n",
"I PRON PRP {'Case': 'Nom', 'Number': 'Sing', 'Person': '1', 'PronType': 'Prs'}\n",
"can AUX MD {'VerbForm': 'Fin'}\n",
"tell VERB VB {'VerbForm': 'Inf'}\n",
"you PRON PRP {'Person': '2', 'PronType': 'Prs'}\n",
"very ADV RB {}\n",
"senior ADJ JJ {'Degree': 'Pos'}\n",
"CEOs NOUN NNS {'Number': 'Plur'}\n",
"of ADP IN {}\n",
"major ADJ JJ {'Degree': 'Pos'}\n",
"American ADJ JJ {'Degree': 'Pos'}\n",
"car NOUN NN {'Number': 'Sing'}\n",
"companies NOUN NNS {'Number': 'Plur'}\n",
"would AUX MD {'VerbForm': 'Fin'}\n",
"shake VERB VB {'VerbForm': 'Inf'}\n",
"my PRON PRP$ {'Number': 'Sing', 'Person': '1', 'Poss': 'Yes', 'PronType': 'Prs'}\n",
"hand NOUN NN {'Number': 'Sing'}\n",
"and CCONJ CC {'ConjType': 'Cmp'}\n",
"turn VERB VB {'VerbForm': 'Inf'}\n",
"away ADV RB {}\n",
"because SCONJ IN {}\n",
"I PRON PRP {'Case': 'Nom', 'Number': 'Sing', 'Person': '1', 'PronType': 'Prs'}\n",
"was AUX VBD {'Mood': 'Ind', 'Number': 'Sing', 'Person': '3', 'Tense': 'Past', 'VerbForm': 'Fin'}\n",
"n’t PART RB {'Polarity': 'Neg'}\n",
"worth ADJ JJ {'Degree': 'Pos'}\n",
"talking VERB VBG {'Aspect': 'Prog', 'Tense': 'Pres', 'VerbForm': 'Part'}\n",
"to ADP IN {}\n",
", PUNCT , {'PunctType': 'Comm'}\n",
"” PUNCT '' {'PunctSide': 'Fin', 'PunctType': 'Quot'}\n",
"said VERB VBD {'Tense': 'Past', 'VerbForm': 'Fin'}\n",
"Thrun PROPN NNP {'Number': 'Sing'}\n",
", PUNCT , {'PunctType': 'Comm'}\n",
"in ADP IN {}\n",
"an DET DT {'Definite': 'Ind', 'PronType': 'Art'}\n",
"interview NOUN NN {'Number': 'Sing'}\n",
"with ADP IN {}\n",
"Recode PROPN NNP {'Number': 'Sing'}\n",
"earlier ADV RBR {'Degree': 'Cmp'}\n",
"this DET DT {'Number': 'Sing', 'PronType': 'Dem'}\n",
"week NOUN NN {'Number': 'Sing'}\n",
". PUNCT . {'PunctType': 'Peri'}\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"doc[1].morph.get(\"Number\"), doc[2].morph.get(\"Number\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "duvX5ynZ8uET",
"outputId": "037076a3-5a31-4f6a-f0bb-6b802bf68d24"
},
"execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"([], ['Sing'])"
]
},
"metadata": {},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"source": [
"doc[2].morph.get(\"Number\")[0]"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 35
},
"id": "yUrqLwDW84v_",
"outputId": "58d3c146-13ed-467a-8c74-aa67559a3f4e"
},
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'Sing'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 5
}
]
},
{
"cell_type": "markdown",
"source": [
"## Japanese モデルの morph"
],
"metadata": {
"id": "V6EGccjY8CmO"
}
},
{
"cell_type": "code",
"source": [
"ja_nlp = spacy.load(\"ja_core_news_sm\")\n",
"\n",
"# https://ja.wikipedia.org/wiki/SpaCy\n",
"ja_text = (\n",
" \"spaCyは高度な自然言語処理を行うためプログラミング言語\"\n",
" \"PythonとCythonで書かれたオープンソースソフトウェア・ライブラリである。\"\n",
" \"spaCyには、無料のオープンソースライブラリとして利用できるいくつかの拡張機能とビジュアライザが付属している。\"\n",
")\n",
"ja_doc = ja_nlp(ja_text)\n",
"\n",
"for token in ja_doc:\n",
" print(token.text, token.pos_, token.tag_, token.morph.to_dict())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "wVlCjZMI97xO",
"outputId": "ae962c7f-e6ce-4360-a748-9a86486aef22"
},
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"spaCy NOUN 名詞-普通名詞-一般 {'Reading': 'スペーシー'}\n",
"は ADP 助詞-係助詞 {'Reading': 'ハ'}\n",
"高度 ADJ 形状詞-一般 {'Reading': 'コウド'}\n",
"な AUX 助動詞 {'Inflection': '助動詞-ダ;連体形-一般', 'Reading': 'ナ'}\n",
"自然 NOUN 名詞-普通名詞-一般 {'Reading': 'シゼン'}\n",
"言語 NOUN 名詞-普通名詞-一般 {'Reading': 'ゲンゴ'}\n",
"処理 NOUN 名詞-普通名詞-サ変可能 {'Reading': 'ショリ'}\n",
"を ADP 助詞-格助詞 {'Reading': 'ヲ'}\n",
"行う VERB 動詞-一般 {'Inflection': '五段-ワア行;連体形-一般', 'Reading': 'オコナウ'}\n",
"ため NOUN 名詞-普通名詞-副詞可能 {'Reading': 'タメ'}\n",
"プログラミング NOUN 名詞-普通名詞-サ変可能 {'Reading': 'プログラミング'}\n",
"言語 NOUN 名詞-普通名詞-一般 {'Reading': 'ゲンゴ'}\n",
"Python NOUN 名詞-固有名詞-一般 {'Reading': 'パイソン'}\n",
"と ADP 助詞-格助詞 {'Reading': 'ト'}\n",
"Cython NOUN 名詞-普通名詞-一般 {'Reading': 'cython'}\n",
"で ADP 助詞-格助詞 {'Reading': 'デ'}\n",
"書か VERB 動詞-一般 {'Inflection': '五段-カ行;未然形-一般', 'Reading': 'カカ'}\n",
"れ AUX 助動詞 {'Inflection': '助動詞-レル;連用形-一般', 'Reading': 'レ'}\n",
"た AUX 助動詞 {'Inflection': '助動詞-タ;連体形-一般', 'Reading': 'タ'}\n",
"オープン NOUN 名詞-普通名詞-サ変形状詞可能 {'Reading': 'オープン'}\n",
"ソース NOUN 名詞-普通名詞-一般 {'Reading': 'ソース'}\n",
"ソフトウェア NOUN 名詞-普通名詞-一般 {'Reading': 'ソフトウェア'}\n",
"・ SYM 補助記号-一般 {'Reading': '・'}\n",
"ライブラリ NOUN 名詞-普通名詞-一般 {'Reading': 'ライブラリ'}\n",
"で AUX 助動詞 {'Inflection': '助動詞-ダ;連用形-一般', 'Reading': 'デ'}\n",
"ある VERB 動詞-非自立可能 {'Inflection': '五段-ラ行;終止形-一般', 'Reading': 'アル'}\n",
"。 PUNCT 補助記号-句点 {'Reading': '。'}\n",
"spaCy NOUN 名詞-普通名詞-一般 {'Reading': 'スペーシー'}\n",
"に ADP 助詞-格助詞 {'Reading': 'ニ'}\n",
"は ADP 助詞-係助詞 {'Reading': 'ハ'}\n",
"、 PUNCT 補助記号-読点 {'Reading': '、'}\n",
"無料 NOUN 名詞-普通名詞-一般 {'Reading': 'ムリョウ'}\n",
"の ADP 助詞-格助詞 {'Reading': 'ノ'}\n",
"オープン NOUN 名詞-普通名詞-サ変形状詞可能 {'Reading': 'オープン'}\n",
"ソース NOUN 名詞-普通名詞-一般 {'Reading': 'ソース'}\n",
"ライブラリ NOUN 名詞-普通名詞-一般 {'Reading': 'ライブラリ'}\n",
"と ADP 助詞-格助詞 {'Reading': 'ト'}\n",
"し AUX 動詞-非自立可能 {'Inflection': 'サ行変格;連用形-一般', 'Reading': 'シ'}\n",
"て SCONJ 助詞-接続助詞 {'Reading': 'テ'}\n",
"利用 VERB 名詞-普通名詞-サ変可能 {'Reading': 'リヨウ'}\n",
"できる AUX 動詞-非自立可能 {'Inflection': '上一段-カ行;連体形-一般', 'Reading': 'デキル'}\n",
"いく NUM 名詞-数詞 {'Reading': 'イク'}\n",
"つ NOUN 接尾辞-名詞的-助数詞 {'Reading': 'ツ'}\n",
"か ADP 助詞-副助詞 {'Reading': 'カ'}\n",
"の ADP 助詞-格助詞 {'Reading': 'ノ'}\n",
"拡張 NOUN 名詞-普通名詞-サ変可能 {'Reading': 'カクチョウ'}\n",
"機能 NOUN 名詞-普通名詞-サ変可能 {'Reading': 'キノウ'}\n",
"と ADP 助詞-格助詞 {'Reading': 'ト'}\n",
"ビジュアライザ NOUN 名詞-普通名詞-一般 {'Reading': 'ビジュアライザ'}\n",
"が ADP 助詞-格助詞 {'Reading': 'ガ'}\n",
"付属 VERB 名詞-普通名詞-サ変可能 {'Reading': 'フゾク'}\n",
"し AUX 動詞-非自立可能 {'Inflection': 'サ行変格;連用形-一般', 'Reading': 'シ'}\n",
"て SCONJ 助詞-接続助詞 {'Reading': 'テ'}\n",
"いる VERB 動詞-非自立可能 {'Inflection': '上一段-ア行;終止形-一般', 'Reading': 'イル'}\n",
"。 PUNCT 補助記号-句点 {'Reading': '。'}\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"inflection = ja_doc[3].morph.get(\"Inflection\")\n",
"print(inflection)\n",
"if inflection:\n",
" ctype, cform = inflection[0].split(\";\")\n",
" print(f\"ctype: {ctype}, cform: {cform}\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "PSC8vsJxEhnl",
"outputId": "2002ae4b-7f3e-496c-fd64-5427e9c2cbb9"
},
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"['助動詞-ダ;連体形-一般']\n",
"ctype: 助動詞-ダ, cform: 連体形-一般\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"### 参考: sudachipy"
],
"metadata": {
"id": "yLEkZT_5IArx"
}
},
{
"cell_type": "code",
"source": [
"from sudachipy import tokenizer\n",
"from sudachipy import dictionary\n",
"\n",
"tokenizer_obj = dictionary.Dictionary().create()\n",
"\n",
"for m in tokenizer_obj.tokenize(ja_text):\n",
" print(m.surface(), m.reading_form(), m.part_of_speech())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1H2IZhDq_r7n",
"outputId": "aa070bf6-57ae-4680-b7cb-fb97955f9c01"
},
"execution_count": 8,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"spaCy スペーシー ('名詞', '普通名詞', '一般', '*', '*', '*')\n",
"は ハ ('助詞', '係助詞', '*', '*', '*', '*')\n",
"高度 コウド ('形状詞', '一般', '*', '*', '*', '*')\n",
"な ナ ('助動詞', '*', '*', '*', '助動詞-ダ', '連体形-一般')\n",
"自然言語処理 シゼンゲンゴショリ ('名詞', '普通名詞', '一般', '*', '*', '*')\n",
"を ヲ ('助詞', '格助詞', '*', '*', '*', '*')\n",
"行う オコナウ ('動詞', '一般', '*', '*', '五段-ワア行', '連体形-一般')\n",
"ため タメ ('名詞', '普通名詞', '副詞可能', '*', '*', '*')\n",
"プログラミング プログラミング ('名詞', '普通名詞', 'サ変可能', '*', '*', '*')\n",
"言語 ゲンゴ ('名詞', '普通名詞', '一般', '*', '*', '*')\n",
"Python パイソン ('名詞', '固有名詞', '一般', '*', '*', '*')\n",
"と ト ('助詞', '格助詞', '*', '*', '*', '*')\n",
"Cython cython ('名詞', '普通名詞', '一般', '*', '*', '*')\n",
"で デ ('助詞', '格助詞', '*', '*', '*', '*')\n",
"書か カカ ('動詞', '一般', '*', '*', '五段-カ行', '未然形-一般')\n",
"れ レ ('助動詞', '*', '*', '*', '助動詞-レル', '連用形-一般')\n",
"た タ ('助動詞', '*', '*', '*', '助動詞-タ', '連体形-一般')\n",
"オープンソースソフトウェア オープンソースソフトウェア ('名詞', '普通名詞', '一般', '*', '*', '*')\n",
"・ ・ ('補助記号', '一般', '*', '*', '*', '*')\n",
"ライブラリ ライブラリ ('名詞', '普通名詞', '一般', '*', '*', '*')\n",
"で デ ('助動詞', '*', '*', '*', '助動詞-ダ', '連用形-一般')\n",
"ある アル ('動詞', '非自立可能', '*', '*', '五段-ラ行', '終止形-一般')\n",
"。 。 ('補助記号', '句点', '*', '*', '*', '*')\n",
"spaCy スペーシー ('名詞', '普通名詞', '一般', '*', '*', '*')\n",
"に ニ ('助詞', '格助詞', '*', '*', '*', '*')\n",
"は ハ ('助詞', '係助詞', '*', '*', '*', '*')\n",
"、 、 ('補助記号', '読点', '*', '*', '*', '*')\n",
"無料 ムリョウ ('名詞', '普通名詞', '一般', '*', '*', '*')\n",
"の ノ ('助詞', '格助詞', '*', '*', '*', '*')\n",
"オープンソース オープンソース ('名詞', '固有名詞', '一般', '*', '*', '*')\n",
"ライブラリ ライブラリ ('名詞', '普通名詞', '一般', '*', '*', '*')\n",
"と ト ('助詞', '格助詞', '*', '*', '*', '*')\n",
"し シ ('動詞', '非自立可能', '*', '*', 'サ行変格', '連用形-一般')\n",
"て テ ('助詞', '接続助詞', '*', '*', '*', '*')\n",
"利用 リヨウ ('名詞', '普通名詞', 'サ変可能', '*', '*', '*')\n",
"できる デキル ('動詞', '非自立可能', '*', '*', '上一段-カ行', '連体形-一般')\n",
"いくつ イクツ ('名詞', '普通名詞', '一般', '*', '*', '*')\n",
"か カ ('助詞', '副助詞', '*', '*', '*', '*')\n",
"の ノ ('助詞', '格助詞', '*', '*', '*', '*')\n",
"拡張機能 カクチョウキノウ ('名詞', '普通名詞', '一般', '*', '*', '*')\n",
"と ト ('助詞', '格助詞', '*', '*', '*', '*')\n",
"ビジュアライザ ビジュアライザ ('名詞', '普通名詞', '一般', '*', '*', '*')\n",
"が ガ ('助詞', '格助詞', '*', '*', '*', '*')\n",
"付属 フゾク ('名詞', '普通名詞', 'サ変可能', '*', '*', '*')\n",
"し シ ('動詞', '非自立可能', '*', '*', 'サ行変格', '連用形-一般')\n",
"て テ ('助詞', '接続助詞', '*', '*', '*', '*')\n",
"いる イル ('動詞', '非自立可能', '*', '*', '上一段-ア行', '終止形-一般')\n",
"。 。 ('補助記号', '句点', '*', '*', '*', '*')\n"
]
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment