Created
November 30, 2020 13:01
-
-
Save nymwa/442bb597136c424f6601f241b155c605 to your computer and use it in GitHub Desktop.
ja_en_translation.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "name": "ja_en_translation.ipynb", | |
| "provenance": [], | |
| "collapsed_sections": [], | |
| "authorship_tag": "ABX9TyPIk4t1doaHuYh06ZHowCko", | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "accelerator": "GPU" | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/nymwa/442bb597136c424f6601f241b155c605/ja_en_translation.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "tl0RxhcR76aK" | |
| }, | |
| "source": [ | |
| "GPUが使えるか確認 (使えない場合はランタイムの変更をしてランタイムをGPUにしてください)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "ObtxRtwJ72Ah", | |
| "outputId": "d8f35d1c-54c5-4145-afcb-4debbcc957d3" | |
| }, | |
| "source": [ | |
| "! nvidia-smi" | |
| ], | |
| "execution_count": 2, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "Mon Nov 30 11:35:57 2020 \n", | |
| "+-----------------------------------------------------------------------------+\n", | |
| "| NVIDIA-SMI 455.38 Driver Version: 418.67 CUDA Version: 10.1 |\n", | |
| "|-------------------------------+----------------------+----------------------+\n", | |
| "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", | |
| "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", | |
| "| | | MIG M. |\n", | |
| "|===============================+======================+======================|\n", | |
| "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", | |
| "| N/A 66C P8 11W / 70W | 0MiB / 15079MiB | 0% Default |\n", | |
| "| | | ERR! |\n", | |
| "+-------------------------------+----------------------+----------------------+\n", | |
| " \n", | |
| "+-----------------------------------------------------------------------------+\n", | |
| "| Processes: |\n", | |
| "| GPU GI CI PID Type Process name GPU Memory |\n", | |
| "| ID ID Usage |\n", | |
| "|=============================================================================|\n", | |
| "| No running processes found |\n", | |
| "+-----------------------------------------------------------------------------+\n" | |
| ], | |
| "name": "stdout" | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "LLDgeH9v11Vv" | |
| }, | |
| "source": [ | |
| "データの準備" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "p2dOZe9A1r9l", | |
| "outputId": "0e87b3c1-77fe-4242-8089-f34f4c8e7b0d" | |
| }, | |
| "source": [ | |
| "! wget http://www.phontron.com/kftt/download/kftt-data-1.0.tar.gz\n", | |
| "! tar zxvf kftt-data-1.0.tar.gz" | |
| ], | |
| "execution_count": 3, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "--2020-11-30 11:35:59-- http://www.phontron.com/kftt/download/kftt-data-1.0.tar.gz\n", | |
| "Resolving www.phontron.com (www.phontron.com)... 208.113.196.149\n", | |
| "Connecting to www.phontron.com (www.phontron.com)|208.113.196.149|:80... connected.\n", | |
| "HTTP request sent, awaiting response... 200 OK\n", | |
| "Length: 99246893 (95M) [application/gzip]\n", | |
| "Saving to: ‘kftt-data-1.0.tar.gz’\n", | |
| "\n", | |
| "kftt-data-1.0.tar.g 100%[===================>] 94.65M 107MB/s in 0.9s \n", | |
| "\n", | |
| "2020-11-30 11:36:00 (107 MB/s) - ‘kftt-data-1.0.tar.gz’ saved [99246893/99246893]\n", | |
| "\n", | |
| "kftt-data-1.0/\n", | |
| "kftt-data-1.0/data/\n", | |
| "kftt-data-1.0/data/orig/\n", | |
| "kftt-data-1.0/data/orig/kyoto-tune.en\n", | |
| "kftt-data-1.0/data/orig/kyoto-dev.ja\n", | |
| "kftt-data-1.0/data/orig/kyoto-dev.en\n", | |
| "kftt-data-1.0/data/orig/kyoto-train.en\n", | |
| "kftt-data-1.0/data/orig/kyoto-tune.ja\n", | |
| "kftt-data-1.0/data/orig/kyoto-train.ja\n", | |
| "kftt-data-1.0/data/orig/kyoto-test.ja\n", | |
| "kftt-data-1.0/data/orig/kyoto-test.en\n", | |
| "kftt-data-1.0/data/tok/\n", | |
| "kftt-data-1.0/data/tok/kyoto-tune.en\n", | |
| "kftt-data-1.0/data/tok/kyoto-dev.ja\n", | |
| "kftt-data-1.0/data/tok/kyoto-train.cln.en\n", | |
| "kftt-data-1.0/data/tok/kyoto-dev.en\n", | |
| "kftt-data-1.0/data/tok/kyoto-train.en\n", | |
| "kftt-data-1.0/data/tok/kyoto-tune.ja\n", | |
| "kftt-data-1.0/data/tok/kyoto-train.cln.ja\n", | |
| "kftt-data-1.0/data/tok/kyoto-train.ja\n", | |
| "kftt-data-1.0/data/tok/kyoto-test.ja\n", | |
| "kftt-data-1.0/data/tok/kyoto-test.en\n", | |
| "kftt-data-1.0/README.txt\n" | |
| ], | |
| "name": "stdout" | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "Y1xelkzu3Kjt" | |
| }, | |
| "source": [ | |
| "機械翻訳ライブラリのfairseqをインストールします" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "tsloYngM3HI-", | |
| "outputId": "6f193fd3-dfba-44bf-e9f0-e39875dd43bb" | |
| }, | |
| "source": [ | |
| "! pip install fairseq" | |
| ], | |
| "execution_count": 4, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "Collecting fairseq\n", | |
| "\u001b[?25l Downloading https://files.pythonhosted.org/packages/2c/da/7c7032988dade3b21ccfd5b226e50b382abfd3459129d67240bb004506ae/fairseq-0.10.1-cp36-cp36m-manylinux1_x86_64.whl (1.7MB)\n", | |
| "\u001b[K |████████████████████████████████| 1.7MB 16.3MB/s \n", | |
| "\u001b[?25hCollecting sacrebleu>=1.4.12\n", | |
| "\u001b[?25l Downloading https://files.pythonhosted.org/packages/a3/c4/8e948f601a4f9609e8b2b58f31966cb13cf17b940b82aa3e767f01c42c52/sacrebleu-1.4.14-py3-none-any.whl (64kB)\n", | |
| "\u001b[K |████████████████████████████████| 71kB 10.2MB/s \n", | |
| "\u001b[?25hCollecting hydra-core\n", | |
| "\u001b[?25l Downloading https://files.pythonhosted.org/packages/f0/1f/7f502b9e37596164111655861370b08626f46f9e4524433c354f472765d4/hydra_core-1.0.4-py3-none-any.whl (122kB)\n", | |
| "\u001b[K |████████████████████████████████| 122kB 63.6MB/s \n", | |
| "\u001b[?25hRequirement already satisfied: tqdm in /usr/local/lib/python3.6/dist-packages (from fairseq) (4.41.1)\n", | |
| "Requirement already satisfied: cython in /usr/local/lib/python3.6/dist-packages (from fairseq) (0.29.21)\n", | |
| "Requirement already satisfied: cffi in /usr/local/lib/python3.6/dist-packages (from fairseq) (1.14.3)\n", | |
| "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from fairseq) (1.18.5)\n", | |
| "Requirement already satisfied: regex in /usr/local/lib/python3.6/dist-packages (from fairseq) (2019.12.20)\n", | |
| "Requirement already satisfied: torch in /usr/local/lib/python3.6/dist-packages (from fairseq) (1.7.0+cu101)\n", | |
| "Requirement already satisfied: dataclasses in /usr/local/lib/python3.6/dist-packages (from fairseq) (0.8)\n", | |
| "Collecting portalocker\n", | |
| " Downloading https://files.pythonhosted.org/packages/89/a6/3814b7107e0788040870e8825eebf214d72166adf656ba7d4bf14759a06a/portalocker-2.0.0-py2.py3-none-any.whl\n", | |
| "Collecting omegaconf>=2.0.5\n", | |
| " Downloading https://files.pythonhosted.org/packages/e5/f6/043b6d255dd6fbf2025110cea35b87f4c5100a181681d8eab496269f0d5b/omegaconf-2.0.5-py3-none-any.whl\n", | |
| "Collecting antlr4-python3-runtime==4.8\n", | |
| "\u001b[?25l Downloading https://files.pythonhosted.org/packages/56/02/789a0bddf9c9b31b14c3e79ec22b9656185a803dc31c15f006f9855ece0d/antlr4-python3-runtime-4.8.tar.gz (112kB)\n", | |
| "\u001b[K |████████████████████████████████| 112kB 61.8MB/s \n", | |
| "\u001b[?25hRequirement already satisfied: importlib-resources; python_version < \"3.9\" in /usr/local/lib/python3.6/dist-packages (from hydra-core->fairseq) (3.3.0)\n", | |
| "Requirement already satisfied: pycparser in /usr/local/lib/python3.6/dist-packages (from cffi->fairseq) (2.20)\n", | |
| "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.6/dist-packages (from torch->fairseq) (3.7.4.3)\n", | |
| "Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from torch->fairseq) (0.16.0)\n", | |
| "Collecting PyYAML>=5.1.*\n", | |
| "\u001b[?25l Downloading https://files.pythonhosted.org/packages/64/c2/b80047c7ac2478f9501676c988a5411ed5572f35d1beff9cae07d321512c/PyYAML-5.3.1.tar.gz (269kB)\n", | |
| "\u001b[K |████████████████████████████████| 276kB 51.4MB/s \n", | |
| "\u001b[?25hRequirement already satisfied: zipp>=0.4; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from importlib-resources; python_version < \"3.9\"->hydra-core->fairseq) (3.4.0)\n", | |
| "Building wheels for collected packages: antlr4-python3-runtime, PyYAML\n", | |
| " Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
| " Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.8-cp36-none-any.whl size=141230 sha256=b2d6bd73c0631a536a1599059d7b58ba8066c95567901da472d2d84819e7d9bc\n", | |
| " Stored in directory: /root/.cache/pip/wheels/e3/e2/fa/b78480b448b8579ddf393bebd3f47ee23aa84c89b6a78285c8\n", | |
| " Building wheel for PyYAML (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
| " Created wheel for PyYAML: filename=PyYAML-5.3.1-cp36-cp36m-linux_x86_64.whl size=44619 sha256=171c058bc1eeb35e2d0ae2362bdfb1744ffa5f5aa7680e3f6d9be42203537e32\n", | |
| " Stored in directory: /root/.cache/pip/wheels/a7/c1/ea/cf5bd31012e735dc1dfea3131a2d5eae7978b251083d6247bd\n", | |
| "Successfully built antlr4-python3-runtime PyYAML\n", | |
| "Installing collected packages: portalocker, sacrebleu, PyYAML, omegaconf, antlr4-python3-runtime, hydra-core, fairseq\n", | |
| " Found existing installation: PyYAML 3.13\n", | |
| " Uninstalling PyYAML-3.13:\n", | |
| " Successfully uninstalled PyYAML-3.13\n", | |
| "Successfully installed PyYAML-5.3.1 antlr4-python3-runtime-4.8 fairseq-0.10.1 hydra-core-1.0.4 omegaconf-2.0.5 portalocker-2.0.0 sacrebleu-1.4.14\n" | |
| ], | |
| "name": "stdout" | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "P8hAnVcd3RxP" | |
| }, | |
| "source": [ | |
| "文をトークン単位に分割するライブラリのsentencepieceをインストールします" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "V5rTl8BS3Z1L", | |
| "outputId": "f3f8aa1a-910e-48d9-bd2b-7e2a2d63c6a0" | |
| }, | |
| "source": [ | |
| "! pip install sentencepiece" | |
| ], | |
| "execution_count": 5, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "Collecting sentencepiece\n", | |
| "\u001b[?25l Downloading https://files.pythonhosted.org/packages/e5/2d/6d4ca4bef9a67070fa1cac508606328329152b1df10bdf31fb6e4e727894/sentencepiece-0.1.94-cp36-cp36m-manylinux2014_x86_64.whl (1.1MB)\n", | |
| "\r\u001b[K |▎ | 10kB 24.9MB/s eta 0:00:01\r\u001b[K |▋ | 20kB 17.4MB/s eta 0:00:01\r\u001b[K |▉ | 30kB 15.4MB/s eta 0:00:01\r\u001b[K |█▏ | 40kB 14.8MB/s eta 0:00:01\r\u001b[K |█▌ | 51kB 12.0MB/s eta 0:00:01\r\u001b[K |█▊ | 61kB 12.0MB/s eta 0:00:01\r\u001b[K |██ | 71kB 11.8MB/s eta 0:00:01\r\u001b[K |██▍ | 81kB 12.1MB/s eta 0:00:01\r\u001b[K |██▋ | 92kB 11.9MB/s eta 0:00:01\r\u001b[K |███ | 102kB 11.8MB/s eta 0:00:01\r\u001b[K |███▎ | 112kB 11.8MB/s eta 0:00:01\r\u001b[K |███▌ | 122kB 11.8MB/s eta 0:00:01\r\u001b[K |███▉ | 133kB 11.8MB/s eta 0:00:01\r\u001b[K |████▏ | 143kB 11.8MB/s eta 0:00:01\r\u001b[K |████▍ | 153kB 11.8MB/s eta 0:00:01\r\u001b[K |████▊ | 163kB 11.8MB/s eta 0:00:01\r\u001b[K |█████ | 174kB 11.8MB/s eta 0:00:01\r\u001b[K |█████▎ | 184kB 11.8MB/s eta 0:00:01\r\u001b[K |█████▋ | 194kB 11.8MB/s eta 0:00:01\r\u001b[K |█████▉ | 204kB 11.8MB/s eta 0:00:01\r\u001b[K |██████▏ | 215kB 11.8MB/s eta 0:00:01\r\u001b[K |██████▌ | 225kB 11.8MB/s eta 0:00:01\r\u001b[K |██████▊ | 235kB 11.8MB/s eta 0:00:01\r\u001b[K |███████ | 245kB 11.8MB/s eta 0:00:01\r\u001b[K |███████▍ | 256kB 11.8MB/s eta 0:00:01\r\u001b[K |███████▋ | 266kB 11.8MB/s eta 0:00:01\r\u001b[K |████████ | 276kB 11.8MB/s eta 0:00:01\r\u001b[K |████████▎ | 286kB 11.8MB/s eta 0:00:01\r\u001b[K |████████▌ | 296kB 11.8MB/s eta 0:00:01\r\u001b[K |████████▉ | 307kB 11.8MB/s eta 0:00:01\r\u001b[K |█████████ | 317kB 11.8MB/s eta 0:00:01\r\u001b[K |█████████▍ | 327kB 11.8MB/s eta 0:00:01\r\u001b[K |█████████▊ | 337kB 11.8MB/s eta 0:00:01\r\u001b[K |██████████ | 348kB 11.8MB/s eta 0:00:01\r\u001b[K |██████████▎ | 358kB 11.8MB/s eta 0:00:01\r\u001b[K |██████████▋ | 368kB 11.8MB/s eta 0:00:01\r\u001b[K |██████████▉ | 378kB 11.8MB/s eta 0:00:01\r\u001b[K |███████████▏ | 389kB 11.8MB/s eta 0:00:01\r\u001b[K |███████████▌ | 399kB 11.8MB/s eta 0:00:01\r\u001b[K |███████████▊ | 409kB 11.8MB/s eta 0:00:01\r\u001b[K |████████████ | 419kB 11.8MB/s eta 0:00:01\r\u001b[K |████████████▍ | 430kB 11.8MB/s eta 0:00:01\r\u001b[K |████████████▋ | 440kB 11.8MB/s eta 0:00:01\r\u001b[K |█████████████ | 450kB 11.8MB/s eta 0:00:01\r\u001b[K |█████████████▎ | 460kB 11.8MB/s eta 0:00:01\r\u001b[K |█████████████▌ | 471kB 11.8MB/s eta 0:00:01\r\u001b[K |█████████████▉ | 481kB 11.8MB/s eta 0:00:01\r\u001b[K |██████████████ | 491kB 11.8MB/s eta 0:00:01\r\u001b[K |██████████████▍ | 501kB 11.8MB/s eta 0:00:01\r\u001b[K |██████████████▊ | 512kB 11.8MB/s eta 0:00:01\r\u001b[K |███████████████ | 522kB 11.8MB/s eta 0:00:01\r\u001b[K |███████████████▎ | 532kB 11.8MB/s eta 0:00:01\r\u001b[K |███████████████▋ | 542kB 11.8MB/s eta 0:00:01\r\u001b[K |███████████████▉ | 552kB 11.8MB/s eta 0:00:01\r\u001b[K |████████████████▏ | 563kB 11.8MB/s eta 0:00:01\r\u001b[K |████████████████▌ | 573kB 11.8MB/s eta 0:00:01\r\u001b[K |████████████████▊ | 583kB 11.8MB/s eta 0:00:01\r\u001b[K |█████████████████ | 593kB 11.8MB/s eta 0:00:01\r\u001b[K |█████████████████▍ | 604kB 11.8MB/s eta 0:00:01\r\u001b[K |█████████████████▋ | 614kB 11.8MB/s eta 0:00:01\r\u001b[K |██████████████████ | 624kB 11.8MB/s eta 0:00:01\r\u001b[K |██████████████████▏ | 634kB 11.8MB/s eta 0:00:01\r\u001b[K |██████████████████▌ | 645kB 11.8MB/s eta 0:00:01\r\u001b[K |██████████████████▉ | 655kB 11.8MB/s eta 0:00:01\r\u001b[K |███████████████████ | 665kB 11.8MB/s eta 0:00:01\r\u001b[K |███████████████████▍ | 675kB 11.8MB/s eta 0:00:01\r\u001b[K |███████████████████▊ | 686kB 11.8MB/s eta 0:00:01\r\u001b[K |████████████████████ | 696kB 11.8MB/s eta 0:00:01\r\u001b[K |████████████████████▎ | 706kB 11.8MB/s eta 0:00:01\r\u001b[K |████████████████████▋ | 716kB 11.8MB/s eta 0:00:01\r\u001b[K |████████████████████▉ | 727kB 11.8MB/s eta 0:00:01\r\u001b[K |█████████████████████▏ | 737kB 11.8MB/s eta 0:00:01\r\u001b[K |█████████████████████▌ | 747kB 11.8MB/s eta 0:00:01\r\u001b[K |█████████████████████▊ | 757kB 11.8MB/s eta 0:00:01\r\u001b[K |██████████████████████ | 768kB 11.8MB/s eta 0:00:01\r\u001b[K |██████████████████████▎ | 778kB 11.8MB/s eta 0:00:01\r\u001b[K |██████████████████████▋ | 788kB 11.8MB/s eta 0:00:01\r\u001b[K |███████████████████████ | 798kB 11.8MB/s eta 0:00:01\r\u001b[K |███████████████████████▏ | 808kB 11.8MB/s eta 0:00:01\r\u001b[K |███████████████████████▌ | 819kB 11.8MB/s eta 0:00:01\r\u001b[K |███████████████████████▉ | 829kB 11.8MB/s eta 0:00:01\r\u001b[K |████████████████████████ | 839kB 11.8MB/s eta 0:00:01\r\u001b[K |████████████████████████▍ | 849kB 11.8MB/s eta 0:00:01\r\u001b[K |████████████████████████▊ | 860kB 11.8MB/s eta 0:00:01\r\u001b[K |█████████████████████████ | 870kB 11.8MB/s eta 0:00:01\r\u001b[K |█████████████████████████▎ | 880kB 11.8MB/s eta 0:00:01\r\u001b[K |█████████████████████████▋ | 890kB 11.8MB/s eta 0:00:01\r\u001b[K |█████████████████████████▉ | 901kB 11.8MB/s eta 0:00:01\r\u001b[K |██████████████████████████▏ | 911kB 11.8MB/s eta 0:00:01\r\u001b[K |██████████████████████████▌ | 921kB 11.8MB/s eta 0:00:01\r\u001b[K |██████████████████████████▊ | 931kB 11.8MB/s eta 0:00:01\r\u001b[K |███████████████████████████ | 942kB 11.8MB/s eta 0:00:01\r\u001b[K |███████████████████████████▎ | 952kB 11.8MB/s eta 0:00:01\r\u001b[K |███████████████████████████▋ | 962kB 11.8MB/s eta 0:00:01\r\u001b[K |████████████████████████████ | 972kB 11.8MB/s eta 0:00:01\r\u001b[K |████████████████████████████▏ | 983kB 11.8MB/s eta 0:00:01\r\u001b[K |████████████████████████████▌ | 993kB 11.8MB/s eta 0:00:01\r\u001b[K |████████████████████████████▉ | 1.0MB 11.8MB/s eta 0:00:01\r\u001b[K |█████████████████████████████ | 1.0MB 11.8MB/s eta 0:00:01\r\u001b[K |█████████████████████████████▍ | 1.0MB 11.8MB/s eta 0:00:01\r\u001b[K |█████████████████████████████▊ | 1.0MB 11.8MB/s eta 0:00:01\r\u001b[K |██████████████████████████████ | 1.0MB 11.8MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▎ | 1.1MB 11.8MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▋ | 1.1MB 11.8MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▉ | 1.1MB 11.8MB/s eta 0:00:01\r\u001b[K |███████████████████████████████▏| 1.1MB 11.8MB/s eta 0:00:01\r\u001b[K |███████████████████████████████▍| 1.1MB 11.8MB/s eta 0:00:01\r\u001b[K |███████████████████████████████▊| 1.1MB 11.8MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 1.1MB 11.8MB/s \n", | |
| "\u001b[?25hInstalling collected packages: sentencepiece\n", | |
| "Successfully installed sentencepiece-0.1.94\n" | |
| ], | |
| "name": "stdout" | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "Pelp3rMs31oA", | |
| "outputId": "8d4bcd44-0ebb-4be7-8fb8-721e9e9c0c5a" | |
| }, | |
| "source": [ | |
| "! ls kftt-data-1.0/data/orig/" | |
| ], | |
| "execution_count": 6, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "kyoto-dev.en kyoto-test.en kyoto-train.en kyoto-tune.en\n", | |
| "kyoto-dev.ja kyoto-test.ja kyoto-train.ja kyoto-tune.ja\n" | |
| ], | |
| "name": "stdout" | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "fl7gOAoK5yim", | |
| "outputId": "2c510ea5-a873-4b97-9bc6-627bea710855" | |
| }, | |
| "source": [ | |
| "! ls" | |
| ], | |
| "execution_count": 7, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "kftt-data-1.0 kftt-data-1.0.tar.gz sample_data\n" | |
| ], | |
| "name": "stdout" | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "3DnNfdZr3Z3l" | |
| }, | |
| "source": [ | |
| "import sentencepiece as spm\n", | |
| "\n", | |
| "spm.SentencePieceTrainer.Train('--input=kftt-data-1.0/data/orig/kyoto-train.ja --model_prefix=kyoto_ja --vocab_size=8000 --model_type=bpe')" | |
| ], | |
| "execution_count": 8, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "dELyjfw03Z52" | |
| }, | |
| "source": [ | |
| "import re\n", | |
| "\n", | |
| "sp = spm.SentencePieceProcessor()\n", | |
| "sp.Load('kyoto_ja.model')\n", | |
| "\n", | |
| "src_list = ['kftt-data-1.0/data/orig/kyoto-train.ja', 'kftt-data-1.0/data/orig/kyoto-dev.ja', 'kftt-data-1.0/data/orig/kyoto-test.ja']\n", | |
| "dst_list = ['train.sub.ja', 'dev.sub.ja', 'test.sub.ja']\n", | |
| "for src, dst in zip(src_list, dst_list):\n", | |
| " with open(src) as f, open(dst, 'w') as g:\n", | |
| " for x in f:\n", | |
| " x = x.strip()\n", | |
| " x = re.sub(r'\\s+', ' ', x)\n", | |
| " x = sp.encode_as_pieces(x)\n", | |
| " x = ' '.join(x)\n", | |
| " print(x, file=g)" | |
| ], | |
| "execution_count": 12, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "lQYGEO4q3Z9V" | |
| }, | |
| "source": [ | |
| "spm.SentencePieceTrainer.Train('--input=kftt-data-1.0/data/orig/kyoto-train.en --model_prefix=kyoto_en --vocab_size=8000 --model_type=bpe')" | |
| ], | |
| "execution_count": 19, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "yxdx9YtC2J-4" | |
| }, | |
| "source": [ | |
| "sp.Load('kyoto_en.model')\n", | |
| "\n", | |
| "src_list = ['kftt-data-1.0/data/orig/kyoto-train.en', 'kftt-data-1.0/data/orig/kyoto-dev.en', 'kftt-data-1.0/data/orig/kyoto-test.en']\n", | |
| "dst_list = ['train.sub.en', 'dev.sub.en', 'test.sub.en']\n", | |
| "for src, dst in zip(src_list, dst_list):\n", | |
| " with open(src) as f, open(dst, 'w') as g:\n", | |
| " for x in f:\n", | |
| " x = x.strip()\n", | |
| " x = re.sub(r'\\s+', ' ', x)\n", | |
| " x = sp.encode_as_pieces(x)\n", | |
| " x = ' '.join(x)\n", | |
| " print(x, file=g)" | |
| ], | |
| "execution_count": 25, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "hGxpb4-CMGq0" | |
| }, | |
| "source": [ | |
| "x = '''\n", | |
| "import sys\n", | |
| "import sentencepiece as spm\n", | |
| "sp = spm.SentencePieceProcessor()\n", | |
| "sp.Load('kyoto_ja.model')\n", | |
| "for x in sys.stdin:\n", | |
| " x = x.strip()\n", | |
| " x = sp.encode_as_pieces(x)\n", | |
| " x = ' '.join(x)\n", | |
| " print(x)\n", | |
| "'''\n", | |
| "with open('spm.py', 'w') as f:\n", | |
| " f.write(x)" | |
| ], | |
| "execution_count": 53, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "neN7fRDq_mkD" | |
| }, | |
| "source": [ | |
| "データの前処理" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "AjwaPPLX_F06", | |
| "outputId": "086bf434-7b56-42e5-8d04-ffae5232ee1d" | |
| }, | |
| "source": [ | |
| "! fairseq-preprocess -s ja -t en \\\n", | |
| " --trainpref train.sub \\\n", | |
| " --validpref dev.sub \\\n", | |
| " --destdir data \\\n", | |
| " --nwordssrc 8000 \\\n", | |
| " --nwordstgt 8000 \\\n", | |
| " --workers 2" | |
| ], | |
| "execution_count": 35, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "2020-11-30 12:05:57 | INFO | fairseq_cli.preprocess | Namespace(align_suffix=None, alignfile=None, all_gather_list_size=16384, bf16=False, bpe=None, checkpoint_shard_count=1, checkpoint_suffix='', cpu=False, criterion='cross_entropy', dataset_impl='mmap', destdir='data', empty_cache_freq=0, fp16=False, fp16_init_scale=128, fp16_no_flatten_grads=False, fp16_scale_tolerance=0.0, fp16_scale_window=None, joined_dictionary=False, log_format=None, log_interval=100, lr_scheduler='fixed', memory_efficient_bf16=False, memory_efficient_fp16=False, min_loss_scale=0.0001, model_parallel_size=1, no_progress_bar=False, nwordssrc=8000, nwordstgt=8000, only_source=False, optimizer=None, padding_factor=8, profile=False, quantization_config_path=None, scoring='bleu', seed=1, source_lang='ja', srcdict=None, target_lang='en', task='translation', tensorboard_logdir=None, testpref=None, tgtdict=None, threshold_loss_scale=None, thresholdsrc=0, thresholdtgt=0, tokenizer=None, tpu=False, trainpref='train.sub', user_dir=None, validpref='dev.sub', workers=2)\n", | |
| "2020-11-30 12:07:37 | INFO | fairseq_cli.preprocess | [ja] Dictionary: 8000 types\n", | |
| "2020-11-30 12:09:08 | INFO | fairseq_cli.preprocess | [ja] train.sub.ja: 440288 sents, 11698669 tokens, 0.0672% replaced by <unk>\n", | |
| "2020-11-30 12:09:08 | INFO | fairseq_cli.preprocess | [ja] Dictionary: 8000 types\n", | |
| "2020-11-30 12:09:08 | INFO | fairseq_cli.preprocess | [ja] dev.sub.ja: 1166 sents, 27315 tokens, 0.0476% replaced by <unk>\n", | |
| "2020-11-30 12:09:08 | INFO | fairseq_cli.preprocess | [en] Dictionary: 8000 types\n", | |
| "2020-11-30 12:10:55 | INFO | fairseq_cli.preprocess | [en] train.sub.en: 440288 sents, 15295202 tokens, 0.108% replaced by <unk>\n", | |
| "2020-11-30 12:10:55 | INFO | fairseq_cli.preprocess | [en] Dictionary: 8000 types\n", | |
| "2020-11-30 12:10:55 | INFO | fairseq_cli.preprocess | [en] dev.sub.en: 1166 sents, 33434 tokens, 0.144% replaced by <unk>\n", | |
| "2020-11-30 12:10:55 | INFO | fairseq_cli.preprocess | Wrote preprocessed data to data\n" | |
| ], | |
| "name": "stdout" | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "7lsXHu6PEFKU" | |
| }, | |
| "source": [ | |
| "訓練する" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "gizvJnXPBMXm", | |
| "outputId": "bc45f6bb-4979-410e-b726-a085f5e7c8d9" | |
| }, | |
| "source": [ | |
| "! fairseq-train data \\\n", | |
| " --fp16 \\\n", | |
| " --save-dir save \\\n", | |
| " --max-epoch 10 \\\n", | |
| " --arch fconv_iwslt_de_en \\\n", | |
| " --dropout 0.2 \\\n", | |
| " --criterion label_smoothed_cross_entropy \\\n", | |
| " --label-smoothing 0.1 \\\n", | |
| " --optimizer nag --clip-norm 0.1 \\\n", | |
| " --lr 0.5 --lr-scheduler fixed --force-anneal 50 \\\n", | |
| " --max-tokens 8000" | |
| ], | |
| "execution_count": 41, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "2020-11-30 12:24:12 | INFO | fairseq_cli.train | Namespace(all_gather_list_size=16384, arch='fconv_iwslt_de_en', batch_size=None, batch_size_valid=None, best_checkpoint_metric='loss', bf16=False, bpe=None, broadcast_buffers=False, bucket_cap_mb=25, checkpoint_shard_count=1, checkpoint_suffix='', clip_norm=0.1, cpu=False, criterion='label_smoothed_cross_entropy', curriculum=0, data='data', data_buffer_size=10, dataset_impl=None, ddp_backend='c10d', decoder_attention='True', decoder_embed_dim=256, decoder_embed_path=None, decoder_layers='[(256, 3)] * 3', decoder_out_embed_dim=256, device_id=0, disable_validation=False, distributed_backend='nccl', distributed_init_method=None, distributed_no_spawn=False, distributed_num_procs=1, distributed_port=-1, distributed_rank=0, distributed_world_size=1, distributed_wrapper='DDP', dropout=0.2, empty_cache_freq=0, encoder_embed_dim=256, encoder_embed_path=None, encoder_layers='[(256, 3)] * 4', eval_bleu=False, eval_bleu_args=None, eval_bleu_detok='space', eval_bleu_detok_args=None, eval_bleu_print_samples=False, eval_bleu_remove_bpe=None, eval_tokenized_bleu=False, fast_stat_sync=False, find_unused_parameters=False, finetune_from_model=None, fix_batches_to_gpus=False, fixed_validation_seed=None, force_anneal=50, fp16=True, fp16_init_scale=128, fp16_no_flatten_grads=False, fp16_scale_tolerance=0.0, fp16_scale_window=None, gen_subset='test', ignore_prefix_size=0, keep_best_checkpoints=-1, keep_interval_updates=-1, keep_last_epochs=-1, label_smoothing=0.1, left_pad_source='True', left_pad_target='False', load_alignments=False, localsgd_frequency=3, log_format=None, log_interval=100, lr=[0.5], lr_scheduler='fixed', lr_shrink=0.1, max_epoch=10, max_source_positions=1024, max_target_positions=1024, max_tokens=8000, max_tokens_valid=8000, max_update=0, maximize_best_checkpoint_metric=False, memory_efficient_bf16=False, memory_efficient_fp16=False, min_loss_scale=0.0001, min_lr=-1.0, model_parallel_size=1, momentum=0.99, no_epoch_checkpoints=False, no_last_checkpoints=False, no_progress_bar=False, no_save=False, no_save_optimizer_state=False, no_seed_provided=False, nprocs_per_node=1, num_batch_buckets=0, num_shards=1, num_workers=1, optimizer='nag', optimizer_overrides='{}', patience=-1, pipeline_balance=None, pipeline_checkpoint='never', pipeline_chunks=0, pipeline_decoder_balance=None, pipeline_decoder_devices=None, pipeline_devices=None, pipeline_encoder_balance=None, pipeline_encoder_devices=None, pipeline_model_parallel=False, profile=False, quantization_config_path=None, report_accuracy=False, required_batch_size_multiple=8, required_seq_len_multiple=1, reset_dataloader=False, reset_lr_scheduler=False, reset_meters=False, reset_optimizer=False, restore_file='checkpoint_last.pt', save_dir='save', save_interval=1, save_interval_updates=0, scoring='bleu', seed=1, sentence_avg=False, shard_id=0, share_input_output_embed=False, skip_invalid_size_inputs_valid_test=False, slowmo_algorithm='LocalSGD', slowmo_momentum=None, source_lang=None, stop_time_hours=0, target_lang=None, task='translation', tensorboard_logdir=None, threshold_loss_scale=None, tokenizer=None, tpu=False, train_subset='train', truncate_source=False, update_freq=[1], upsample_primary=1, use_bmuf=False, user_dir=None, valid_subset='valid', validate_after_updates=0, validate_interval=1, validate_interval_updates=0, warmup_updates=0, weight_decay=0.0, zero_sharding='none')\n", | |
| "2020-11-30 12:24:12 | INFO | fairseq.tasks.translation | [ja] dictionary: 8000 types\n", | |
| "2020-11-30 12:24:12 | INFO | fairseq.tasks.translation | [en] dictionary: 8000 types\n", | |
| "2020-11-30 12:24:12 | INFO | fairseq.data.data_utils | loaded 1166 examples from: data/valid.ja-en.ja\n", | |
| "2020-11-30 12:24:12 | INFO | fairseq.data.data_utils | loaded 1166 examples from: data/valid.ja-en.en\n", | |
| "2020-11-30 12:24:12 | INFO | fairseq.tasks.translation | data valid ja-en 1166 examples\n", | |
| "2020-11-30 12:24:12 | INFO | fairseq_cli.train | FConvModel(\n", | |
| " (encoder): FConvEncoder(\n", | |
| " (dropout_module): FairseqDropout()\n", | |
| " (embed_tokens): Embedding(8000, 256, padding_idx=1)\n", | |
| " (embed_positions): LearnedPositionalEmbedding(1024, 256, padding_idx=1)\n", | |
| " (fc1): Linear(in_features=256, out_features=256, bias=True)\n", | |
| " (projections): ModuleList(\n", | |
| " (0): None\n", | |
| " (1): None\n", | |
| " (2): None\n", | |
| " (3): None\n", | |
| " )\n", | |
| " (convolutions): ModuleList(\n", | |
| " (0): ConvTBC(256, 512, kernel_size=(3,), padding=(1,))\n", | |
| " (1): ConvTBC(256, 512, kernel_size=(3,), padding=(1,))\n", | |
| " (2): ConvTBC(256, 512, kernel_size=(3,), padding=(1,))\n", | |
| " (3): ConvTBC(256, 512, kernel_size=(3,), padding=(1,))\n", | |
| " )\n", | |
| " (fc2): Linear(in_features=256, out_features=256, bias=True)\n", | |
| " )\n", | |
| " (decoder): FConvDecoder(\n", | |
| " (dropout_module): FairseqDropout()\n", | |
| " (embed_tokens): Embedding(8000, 256, padding_idx=1)\n", | |
| " (embed_positions): LearnedPositionalEmbedding(1024, 256, padding_idx=1)\n", | |
| " (fc1): Linear(in_features=256, out_features=256, bias=True)\n", | |
| " (projections): ModuleList(\n", | |
| " (0): None\n", | |
| " (1): None\n", | |
| " (2): None\n", | |
| " )\n", | |
| " (convolutions): ModuleList(\n", | |
| " (0): LinearizedConvolution(256, 512, kernel_size=(3,), padding=(2,))\n", | |
| " (1): LinearizedConvolution(256, 512, kernel_size=(3,), padding=(2,))\n", | |
| " (2): LinearizedConvolution(256, 512, kernel_size=(3,), padding=(2,))\n", | |
| " )\n", | |
| " (attention): ModuleList(\n", | |
| " (0): AttentionLayer(\n", | |
| " (in_projection): Linear(in_features=256, out_features=256, bias=True)\n", | |
| " (out_projection): Linear(in_features=256, out_features=256, bias=True)\n", | |
| " )\n", | |
| " (1): AttentionLayer(\n", | |
| " (in_projection): Linear(in_features=256, out_features=256, bias=True)\n", | |
| " (out_projection): Linear(in_features=256, out_features=256, bias=True)\n", | |
| " )\n", | |
| " (2): AttentionLayer(\n", | |
| " (in_projection): Linear(in_features=256, out_features=256, bias=True)\n", | |
| " (out_projection): Linear(in_features=256, out_features=256, bias=True)\n", | |
| " )\n", | |
| " )\n", | |
| " (fc2): Linear(in_features=256, out_features=256, bias=True)\n", | |
| " (fc3): Linear(in_features=256, out_features=8000, bias=True)\n", | |
| " )\n", | |
| ")\n", | |
| "2020-11-30 12:24:12 | INFO | fairseq_cli.train | task: translation (TranslationTask)\n", | |
| "2020-11-30 12:24:12 | INFO | fairseq_cli.train | model: fconv_iwslt_de_en (FConvModel)\n", | |
| "2020-11-30 12:24:12 | INFO | fairseq_cli.train | criterion: label_smoothed_cross_entropy (LabelSmoothedCrossEntropyCriterion)\n", | |
| "2020-11-30 12:24:12 | INFO | fairseq_cli.train | num. model params: 10104448 (num. trained: 10104448)\n", | |
| "2020-11-30 12:24:16 | INFO | fairseq.utils | ***********************CUDA enviroments for all 1 workers***********************\n", | |
| "2020-11-30 12:24:16 | INFO | fairseq.utils | rank 0: capabilities = 7.5 ; total memory = 14.726 GB ; name = Tesla T4 \n", | |
| "2020-11-30 12:24:16 | INFO | fairseq.utils | ***********************CUDA enviroments for all 1 workers***********************\n", | |
| "2020-11-30 12:24:16 | INFO | fairseq_cli.train | training on 1 devices (GPUs/TPUs)\n", | |
| "2020-11-30 12:24:16 | INFO | fairseq_cli.train | max tokens per GPU = 8000 and max sentences per GPU = None\n", | |
| "2020-11-30 12:24:16 | INFO | fairseq.trainer | no existing checkpoint found save/checkpoint_last.pt\n", | |
| "2020-11-30 12:24:16 | INFO | fairseq.trainer | loading train data for epoch 1\n", | |
| "2020-11-30 12:24:16 | INFO | fairseq.data.data_utils | loaded 440288 examples from: data/train.ja-en.ja\n", | |
| "2020-11-30 12:24:16 | INFO | fairseq.data.data_utils | loaded 440288 examples from: data/train.ja-en.en\n", | |
| "2020-11-30 12:24:16 | INFO | fairseq.tasks.translation | data train ja-en 440288 examples\n", | |
| "epoch 001: 0% 0/2151 [00:00<?, ?it/s]2020-11-30 12:24:17 | INFO | fairseq.trainer | begin training epoch 1\n", | |
| "epoch 001: 0% 3/2151 [00:00<04:19, 8.29it/s]2020-11-30 12:24:17 | INFO | fairseq.trainer | NOTE: overflow detected, setting loss scale to: 64.0\n", | |
| "epoch 001: 0% 9/2151 [00:00<02:54, 12.29it/s]2020-11-30 12:24:17 | INFO | fairseq.trainer | NOTE: overflow detected, setting loss scale to: 32.0\n", | |
| "2020-11-30 12:24:17 | INFO | fairseq.trainer | NOTE: overflow detected, setting loss scale to: 16.0\n", | |
| "2020-11-30 12:24:17 | INFO | fairseq.trainer | NOTE: overflow detected, setting loss scale to: 8.0\n", | |
| "epoch 001: 1% 12/2151 [00:00<02:27, 14.45it/s]2020-11-30 12:24:17 | INFO | fairseq.trainer | NOTE: overflow detected, setting loss scale to: 4.0\n", | |
| "2020-11-30 12:24:17 | INFO | fairseq.trainer | NOTE: overflow detected, setting loss scale to: 2.0\n", | |
| "epoch 001: 1% 15/2151 [00:00<02:10, 16.34it/s]2020-11-30 12:24:17 | INFO | fairseq.trainer | NOTE: overflow detected, setting loss scale to: 1.0\n", | |
| "epoch 001: 100% 2148/2151 [01:35<00:00, 23.79it/s, loss=7.252, nll_loss=6.233, ppl=75.24, wps=156348, ups=22.35, wpb=6994.5, bsz=199.6, num_updates=2100, lr=0.5, gnorm=0.201, clip=100, loss_scale=1, train_wall=4, wall=95]2020-11-30 12:25:53 | INFO | fairseq_cli.train | begin validation on \"valid\" subset\n", | |
| "\n", | |
| "epoch 001 | valid on 'valid' subset: 0% 0/9 [00:00<?, ?it/s]\u001b[A\n", | |
| "epoch 001 | valid on 'valid' subset: 22% 2/9 [00:00<00:00, 15.77it/s]\u001b[A\n", | |
| "epoch 001 | valid on 'valid' subset: 67% 6/9 [00:00<00:00, 18.94it/s]\u001b[A\n", | |
| " \u001b[A2020-11-30 12:25:53 | INFO | valid | epoch 001 | valid on 'valid' subset | loss 7.126 | nll_loss 6.106 | ppl 68.89 | wps 151304 | wpb 3714.9 | bsz 129.6 | num_updates 2144\n", | |
| "2020-11-30 12:25:53 | INFO | fairseq_cli.train | begin save checkpoint\n", | |
| "2020-11-30 12:25:53 | INFO | fairseq.checkpoint_utils | saved checkpoint save/checkpoint1.pt (epoch 1 @ 2144 updates, score 7.126) (writing took 0.46832819700011896 seconds)\n", | |
| "2020-11-30 12:25:53 | INFO | fairseq_cli.train | end of epoch 1 (average epoch stats below)\n", | |
| "2020-11-30 12:25:53 | INFO | train | epoch 001 | loss 8.28 | nll_loss 7.449 | ppl 174.75 | wps 157611 | ups 22.17 | wpb 7108.8 | bsz 204.5 | num_updates 2144 | lr 0.5 | gnorm 0.295 | clip 100 | loss_scale 1 | train_wall 94 | wall 97\n", | |
| "epoch 002: 0% 0/2151 [00:00<?, ?it/s]2020-11-30 12:25:54 | INFO | fairseq.trainer | begin training epoch 2\n", | |
| "epoch 002: 100% 2149/2151 [01:37<00:00, 22.59it/s, loss=6.501, nll_loss=5.354, ppl=40.89, wps=154007, ups=21.32, wpb=7223.8, bsz=245.1, num_updates=4200, lr=0.5, gnorm=0.204, clip=100, loss_scale=1, train_wall=5, wall=191]2020-11-30 12:27:31 | INFO | fairseq_cli.train | begin validation on \"valid\" subset\n", | |
| "\n", | |
| "epoch 002 | valid on 'valid' subset: 0% 0/9 [00:00<?, ?it/s]\u001b[A\n", | |
| "epoch 002 | valid on 'valid' subset: 22% 2/9 [00:00<00:00, 16.20it/s]\u001b[A\n", | |
| "epoch 002 | valid on 'valid' subset: 67% 6/9 [00:00<00:00, 19.47it/s]\u001b[A\n", | |
| " \u001b[A2020-11-30 12:27:32 | INFO | valid | epoch 002 | valid on 'valid' subset | loss 6.384 | nll_loss 5.233 | ppl 37.61 | wps 151533 | wpb 3714.9 | bsz 129.6 | num_updates 4295 | best_loss 6.384\n", | |
| "2020-11-30 12:27:32 | INFO | fairseq_cli.train | begin save checkpoint\n", | |
| "2020-11-30 12:27:32 | INFO | fairseq.checkpoint_utils | saved checkpoint save/checkpoint2.pt (epoch 2 @ 4295 updates, score 6.384) (writing took 0.457812881999871 seconds)\n", | |
| "2020-11-30 12:27:32 | INFO | fairseq_cli.train | end of epoch 2 (average epoch stats below)\n", | |
| "2020-11-30 12:27:32 | INFO | train | epoch 002 | loss 6.807 | nll_loss 5.709 | ppl 52.3 | wps 154786 | ups 21.77 | wpb 7110.7 | bsz 204.7 | num_updates 4295 | lr 0.5 | gnorm 0.201 | clip 100 | loss_scale 1 | train_wall 96 | wall 196\n", | |
| "epoch 003: 0% 0/2151 [00:00<?, ?it/s]2020-11-30 12:27:32 | INFO | fairseq.trainer | begin training epoch 3\n", | |
| "epoch 003: 100% 2148/2151 [01:37<00:00, 23.22it/s, loss=6.25, nll_loss=5.059, ppl=33.35, wps=159956, ups=22.2, wpb=7205.9, bsz=191.4, num_updates=6400, lr=0.5, gnorm=0.176, clip=100, loss_scale=1, train_wall=4, wall=292]2020-11-30 12:29:10 | INFO | fairseq_cli.train | begin validation on \"valid\" subset\n", | |
| "\n", | |
| "epoch 003 | valid on 'valid' subset: 0% 0/9 [00:00<?, ?it/s]\u001b[A\n", | |
| "epoch 003 | valid on 'valid' subset: 22% 2/9 [00:00<00:00, 18.81it/s]\u001b[A\n", | |
| "epoch 003 | valid on 'valid' subset: 89% 8/9 [00:00<00:00, 23.51it/s]\u001b[A\n", | |
| " \u001b[A2020-11-30 12:29:11 | INFO | valid | epoch 003 | valid on 'valid' subset | loss 6.02 | nll_loss 4.806 | ppl 27.97 | wps 208609 | wpb 3714.9 | bsz 129.6 | num_updates 6446 | best_loss 6.02\n", | |
| "2020-11-30 12:29:11 | INFO | fairseq_cli.train | begin save checkpoint\n", | |
| "2020-11-30 12:29:11 | INFO | fairseq.checkpoint_utils | saved checkpoint save/checkpoint3.pt (epoch 3 @ 6446 updates, score 6.02) (writing took 0.45804810099980386 seconds)\n", | |
| "2020-11-30 12:29:11 | INFO | fairseq_cli.train | end of epoch 3 (average epoch stats below)\n", | |
| "2020-11-30 12:29:11 | INFO | train | epoch 003 | loss 6.336 | nll_loss 5.159 | ppl 35.72 | wps 154511 | ups 21.73 | wpb 7110.7 | bsz 204.7 | num_updates 6446 | lr 0.5 | gnorm 0.191 | clip 100 | loss_scale 1 | train_wall 96 | wall 295\n", | |
| "epoch 004: 0% 0/2151 [00:00<?, ?it/s]2020-11-30 12:29:11 | INFO | fairseq.trainer | begin training epoch 4\n", | |
| "epoch 004: 100% 2149/2151 [01:38<00:00, 24.14it/s, loss=5.968, nll_loss=4.732, ppl=26.58, wps=156925, ups=21.44, wpb=7320.3, bsz=226.1, num_updates=8500, lr=0.5, gnorm=0.167, clip=100, loss_scale=1, train_wall=5, wall=389]2020-11-30 12:30:50 | INFO | fairseq_cli.train | begin validation on \"valid\" subset\n", | |
| "\n", | |
| "epoch 004 | valid on 'valid' subset: 0% 0/9 [00:00<?, ?it/s]\u001b[A\n", | |
| "epoch 004 | valid on 'valid' subset: 22% 2/9 [00:00<00:00, 19.06it/s]\u001b[A\n", | |
| "epoch 004 | valid on 'valid' subset: 89% 8/9 [00:00<00:00, 23.33it/s]\u001b[A\n", | |
| " \u001b[A2020-11-30 12:30:50 | INFO | valid | epoch 004 | valid on 'valid' subset | loss 5.822 | nll_loss 4.584 | ppl 23.99 | wps 201028 | wpb 3714.9 | bsz 129.6 | num_updates 8597 | best_loss 5.822\n", | |
| "2020-11-30 12:30:50 | INFO | fairseq_cli.train | begin save checkpoint\n", | |
| "2020-11-30 12:30:50 | INFO | fairseq.checkpoint_utils | saved checkpoint save/checkpoint4.pt (epoch 4 @ 8597 updates, score 5.822) (writing took 0.4298879289999604 seconds)\n", | |
| "2020-11-30 12:30:50 | INFO | fairseq_cli.train | end of epoch 4 (average epoch stats below)\n", | |
| "2020-11-30 12:30:50 | INFO | train | epoch 004 | loss 6.081 | nll_loss 4.861 | ppl 29.06 | wps 154093 | ups 21.67 | wpb 7110.7 | bsz 204.7 | num_updates 8597 | lr 0.5 | gnorm 0.18 | clip 100 | loss_scale 1 | train_wall 96 | wall 394\n", | |
| "epoch 005: 0% 0/2151 [00:00<?, ?it/s]2020-11-30 12:30:51 | INFO | fairseq.trainer | begin training epoch 5\n", | |
| "epoch 005: 100% 2150/2151 [01:37<00:00, 24.17it/s, loss=5.964, nll_loss=4.726, ppl=26.47, wps=155778, ups=21.9, wpb=7112.8, bsz=180.8, num_updates=10700, lr=0.5, gnorm=0.166, clip=100, loss_scale=1, train_wall=4, wall=490]2020-11-30 12:32:28 | INFO | fairseq_cli.train | begin validation on \"valid\" subset\n", | |
| "\n", | |
| "epoch 005 | valid on 'valid' subset: 0% 0/9 [00:00<?, ?it/s]\u001b[A\n", | |
| "epoch 005 | valid on 'valid' subset: 22% 2/9 [00:00<00:00, 19.11it/s]\u001b[A\n", | |
| "epoch 005 | valid on 'valid' subset: 89% 8/9 [00:00<00:00, 23.74it/s]\u001b[A\n", | |
| " \u001b[A2020-11-30 12:32:29 | INFO | valid | epoch 005 | valid on 'valid' subset | loss 5.702 | nll_loss 4.436 | ppl 21.65 | wps 207680 | wpb 3714.9 | bsz 129.6 | num_updates 10748 | best_loss 5.702\n", | |
| "2020-11-30 12:32:29 | INFO | fairseq_cli.train | begin save checkpoint\n", | |
| "2020-11-30 12:32:29 | INFO | fairseq.checkpoint_utils | saved checkpoint save/checkpoint5.pt (epoch 5 @ 10748 updates, score 5.702) (writing took 0.4352496500000598 seconds)\n", | |
| "2020-11-30 12:32:29 | INFO | fairseq_cli.train | end of epoch 5 (average epoch stats below)\n", | |
| "2020-11-30 12:32:29 | INFO | train | epoch 005 | loss 5.923 | nll_loss 4.678 | ppl 25.6 | wps 154968 | ups 21.79 | wpb 7110.7 | bsz 204.7 | num_updates 10748 | lr 0.5 | gnorm 0.172 | clip 100 | loss_scale 1 | train_wall 96 | wall 493\n", | |
| "epoch 006: 0% 0/2151 [00:00<?, ?it/s]2020-11-30 12:32:29 | INFO | fairseq.trainer | begin training epoch 6\n", | |
| "epoch 006: 100% 2150/2151 [01:38<00:00, 23.86it/s, loss=5.801, nll_loss=4.539, ppl=23.25, wps=155459, ups=21.63, wpb=7188.1, bsz=200.7, num_updates=12800, lr=0.5, gnorm=0.164, clip=100, loss_scale=1, train_wall=5, wall=587]2020-11-30 12:34:08 | INFO | fairseq_cli.train | begin validation on \"valid\" subset\n", | |
| "\n", | |
| "epoch 006 | valid on 'valid' subset: 0% 0/9 [00:00<?, ?it/s]\u001b[A\n", | |
| "epoch 006 | valid on 'valid' subset: 22% 2/9 [00:00<00:00, 18.72it/s]\u001b[A\n", | |
| "epoch 006 | valid on 'valid' subset: 78% 7/9 [00:00<00:00, 22.78it/s]\u001b[A\n", | |
| " \u001b[A2020-11-30 12:34:08 | INFO | valid | epoch 006 | valid on 'valid' subset | loss 5.612 | nll_loss 4.317 | ppl 19.93 | wps 192864 | wpb 3714.9 | bsz 129.6 | num_updates 12899 | best_loss 5.612\n", | |
| "2020-11-30 12:34:08 | INFO | fairseq_cli.train | begin save checkpoint\n", | |
| "2020-11-30 12:34:08 | INFO | fairseq.checkpoint_utils | saved checkpoint save/checkpoint6.pt (epoch 6 @ 12899 updates, score 5.612) (writing took 0.43062741699986873 seconds)\n", | |
| "2020-11-30 12:34:08 | INFO | fairseq_cli.train | end of epoch 6 (average epoch stats below)\n", | |
| "2020-11-30 12:34:08 | INFO | train | epoch 006 | loss 5.811 | nll_loss 4.549 | ppl 23.4 | wps 154272 | ups 21.7 | wpb 7110.7 | bsz 204.7 | num_updates 12899 | lr 0.5 | gnorm 0.166 | clip 100 | loss_scale 1 | train_wall 96 | wall 592\n", | |
| "epoch 007: 0% 0/2151 [00:00<?, ?it/s]2020-11-30 12:34:08 | INFO | fairseq.trainer | begin training epoch 7\n", | |
| "epoch 007: 100% 2148/2151 [01:37<00:00, 23.69it/s, loss=5.685, nll_loss=4.406, ppl=21.21, wps=159422, ups=21.91, wpb=7275.7, bsz=236.2, num_updates=15000, lr=0.5, gnorm=0.164, clip=100, loss_scale=1, train_wall=4, wall=688]2020-11-30 12:35:46 | INFO | fairseq_cli.train | begin validation on \"valid\" subset\n", | |
| "\n", | |
| "epoch 007 | valid on 'valid' subset: 0% 0/9 [00:00<?, ?it/s]\u001b[A\n", | |
| "epoch 007 | valid on 'valid' subset: 22% 2/9 [00:00<00:00, 18.75it/s]\u001b[A\n", | |
| "epoch 007 | valid on 'valid' subset: 89% 8/9 [00:00<00:00, 23.61it/s]\u001b[A\n", | |
| " \u001b[A2020-11-30 12:35:46 | INFO | valid | epoch 007 | valid on 'valid' subset | loss 5.547 | nll_loss 4.264 | ppl 19.21 | wps 198343 | wpb 3714.9 | bsz 129.6 | num_updates 15050 | best_loss 5.547\n", | |
| "2020-11-30 12:35:46 | INFO | fairseq_cli.train | begin save checkpoint\n", | |
| "2020-11-30 12:35:47 | INFO | fairseq.checkpoint_utils | saved checkpoint save/checkpoint7.pt (epoch 7 @ 15050 updates, score 5.547) (writing took 0.44581306800000675 seconds)\n", | |
| "2020-11-30 12:35:47 | INFO | fairseq_cli.train | end of epoch 7 (average epoch stats below)\n", | |
| "2020-11-30 12:35:47 | INFO | train | epoch 007 | loss 5.728 | nll_loss 4.453 | ppl 21.9 | wps 155211 | ups 21.83 | wpb 7110.7 | bsz 204.7 | num_updates 15050 | lr 0.5 | gnorm 0.16 | clip 100 | loss_scale 1 | train_wall 96 | wall 691\n", | |
| "epoch 008: 0% 0/2151 [00:00<?, ?it/s]2020-11-30 12:35:47 | INFO | fairseq.trainer | begin training epoch 8\n", | |
| "epoch 008: 100% 2148/2151 [01:38<00:00, 23.18it/s, loss=5.622, nll_loss=4.33, ppl=20.12, wps=160540, ups=21.84, wpb=7349.6, bsz=213.1, num_updates=17100, lr=0.5, gnorm=0.149, clip=100, loss_scale=2, train_wall=4, wall=785]2020-11-30 12:37:25 | INFO | fairseq_cli.train | begin validation on \"valid\" subset\n", | |
| "\n", | |
| "epoch 008 | valid on 'valid' subset: 0% 0/9 [00:00<?, ?it/s]\u001b[A\n", | |
| "epoch 008 | valid on 'valid' subset: 22% 2/9 [00:00<00:00, 19.16it/s]\u001b[A\n", | |
| "epoch 008 | valid on 'valid' subset: 89% 8/9 [00:00<00:00, 23.97it/s]\u001b[A\n", | |
| " \u001b[A2020-11-30 12:37:26 | INFO | valid | epoch 008 | valid on 'valid' subset | loss 5.484 | nll_loss 4.171 | ppl 18.01 | wps 218928 | wpb 3714.9 | bsz 129.6 | num_updates 17201 | best_loss 5.484\n", | |
| "2020-11-30 12:37:26 | INFO | fairseq_cli.train | begin save checkpoint\n", | |
| "2020-11-30 12:37:26 | INFO | fairseq.checkpoint_utils | saved checkpoint save/checkpoint8.pt (epoch 8 @ 17201 updates, score 5.484) (writing took 0.41688709699974424 seconds)\n", | |
| "2020-11-30 12:37:26 | INFO | fairseq_cli.train | end of epoch 8 (average epoch stats below)\n", | |
| "2020-11-30 12:37:26 | INFO | train | epoch 008 | loss 5.662 | nll_loss 4.377 | ppl 20.78 | wps 154076 | ups 21.67 | wpb 7110.7 | bsz 204.7 | num_updates 17201 | lr 0.5 | gnorm 0.156 | clip 100 | loss_scale 2 | train_wall 96 | wall 790\n", | |
| "epoch 009: 0% 0/2151 [00:00<?, ?it/s]2020-11-30 12:37:26 | INFO | fairseq.trainer | begin training epoch 9\n", | |
| "epoch 009: 100% 2150/2151 [01:38<00:00, 23.43it/s, loss=5.576, nll_loss=4.278, ppl=19.41, wps=157247, ups=21.6, wpb=7279.1, bsz=210.8, num_updates=19300, lr=0.5, gnorm=0.14, clip=100, loss_scale=2, train_wall=5, wall=886]2020-11-30 12:39:04 | INFO | fairseq_cli.train | begin validation on \"valid\" subset\n", | |
| "\n", | |
| "epoch 009 | valid on 'valid' subset: 0% 0/9 [00:00<?, ?it/s]\u001b[A\n", | |
| "epoch 009 | valid on 'valid' subset: 11% 1/9 [00:00<00:00, 9.75it/s]\u001b[A\n", | |
| "epoch 009 | valid on 'valid' subset: 78% 7/9 [00:00<00:00, 13.01it/s]\u001b[A\n", | |
| " \u001b[A2020-11-30 12:39:05 | INFO | valid | epoch 009 | valid on 'valid' subset | loss 5.432 | nll_loss 4.125 | ppl 17.45 | wps 242535 | wpb 3714.9 | bsz 129.6 | num_updates 19352 | best_loss 5.432\n", | |
| "2020-11-30 12:39:05 | INFO | fairseq_cli.train | begin save checkpoint\n", | |
| "2020-11-30 12:39:05 | INFO | fairseq.checkpoint_utils | saved checkpoint save/checkpoint9.pt (epoch 9 @ 19352 updates, score 5.432) (writing took 0.43970147699974405 seconds)\n", | |
| "2020-11-30 12:39:05 | INFO | fairseq_cli.train | end of epoch 9 (average epoch stats below)\n", | |
| "2020-11-30 12:39:05 | INFO | train | epoch 009 | loss 5.61 | nll_loss 4.316 | ppl 19.92 | wps 154533 | ups 21.73 | wpb 7110.7 | bsz 204.7 | num_updates 19352 | lr 0.5 | gnorm 0.152 | clip 100 | loss_scale 2 | train_wall 96 | wall 889\n", | |
| "epoch 010: 0% 0/2151 [00:00<?, ?it/s]2020-11-30 12:39:05 | INFO | fairseq.trainer | begin training epoch 10\n", | |
| "epoch 010: 100% 2149/2151 [01:38<00:00, 23.09it/s, loss=5.582, nll_loss=4.284, ppl=19.48, wps=149475, ups=21.46, wpb=6966.1, bsz=195, num_updates=21500, lr=0.5, gnorm=0.157, clip=100, loss_scale=2, train_wall=5, wall=988] 2020-11-30 12:40:44 | INFO | fairseq_cli.train | begin validation on \"valid\" subset\n", | |
| "\n", | |
| "epoch 010 | valid on 'valid' subset: 0% 0/9 [00:00<?, ?it/s]\u001b[A\n", | |
| "epoch 010 | valid on 'valid' subset: 22% 2/9 [00:00<00:00, 19.15it/s]\u001b[A\n", | |
| "epoch 010 | valid on 'valid' subset: 89% 8/9 [00:00<00:00, 23.82it/s]\u001b[A\n", | |
| " \u001b[A2020-11-30 12:40:44 | INFO | valid | epoch 010 | valid on 'valid' subset | loss 5.397 | nll_loss 4.068 | ppl 16.77 | wps 207006 | wpb 3714.9 | bsz 129.6 | num_updates 21503 | best_loss 5.397\n", | |
| "2020-11-30 12:40:44 | INFO | fairseq_cli.train | begin save checkpoint\n", | |
| "2020-11-30 12:40:45 | INFO | fairseq.checkpoint_utils | saved checkpoint save/checkpoint10.pt (epoch 10 @ 21503 updates, score 5.397) (writing took 0.47614908100013054 seconds)\n", | |
| "2020-11-30 12:40:45 | INFO | fairseq_cli.train | end of epoch 10 (average epoch stats below)\n", | |
| "2020-11-30 12:40:45 | INFO | train | epoch 010 | loss 5.565 | nll_loss 4.264 | ppl 19.22 | wps 153577 | ups 21.6 | wpb 7110.7 | bsz 204.7 | num_updates 21503 | lr 0.5 | gnorm 0.148 | clip 100 | loss_scale 2 | train_wall 96 | wall 989\n", | |
| "2020-11-30 12:40:45 | INFO | fairseq_cli.train | done training in 988.2 seconds\n" | |
| ], | |
| "name": "stdout" | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "BqNmkip4EyEM", | |
| "outputId": "72f82d0a-63c8-4b85-debd-8422e9cda540" | |
| }, | |
| "source": [ | |
| " ! echo '無理.' | python spm.py | fairseq-interactive data --path save/checkpoint10.pt --lenpen 0.6" | |
| ], | |
| "execution_count": 62, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "2020-11-30 13:00:59 | INFO | fairseq_cli.interactive | Namespace(all_gather_list_size=16384, batch_size=1, batch_size_valid=None, beam=5, bf16=False, bpe=None, broadcast_buffers=False, bucket_cap_mb=25, buffer_size=1, checkpoint_shard_count=1, checkpoint_suffix='', constraints=None, cpu=False, criterion='cross_entropy', curriculum=0, data='data', data_buffer_size=10, dataset_impl=None, ddp_backend='c10d', decoding_format=None, device_id=0, disable_validation=False, distributed_backend='nccl', distributed_init_method=None, distributed_no_spawn=False, distributed_num_procs=1, distributed_port=-1, distributed_rank=0, distributed_world_size=1, distributed_wrapper='DDP', diverse_beam_groups=-1, diverse_beam_strength=0.5, diversity_rate=-1.0, empty_cache_freq=0, eval_bleu=False, eval_bleu_args=None, eval_bleu_detok='space', eval_bleu_detok_args=None, eval_bleu_print_samples=False, eval_bleu_remove_bpe=None, eval_tokenized_bleu=False, fast_stat_sync=False, find_unused_parameters=False, fix_batches_to_gpus=False, fixed_validation_seed=None, force_anneal=None, fp16=False, fp16_init_scale=128, fp16_no_flatten_grads=False, fp16_scale_tolerance=0.0, fp16_scale_window=None, gen_subset='test', input='-', iter_decode_eos_penalty=0.0, iter_decode_force_max_iter=False, iter_decode_max_iter=10, iter_decode_with_beam=1, iter_decode_with_external_reranker=False, left_pad_source='True', left_pad_target='False', lenpen=0.6, lm_path=None, lm_weight=0.0, load_alignments=False, localsgd_frequency=3, log_format=None, log_interval=100, lr_scheduler='fixed', lr_shrink=0.1, match_source_len=False, max_len_a=0, max_len_b=200, max_source_positions=1024, max_target_positions=1024, max_tokens=None, max_tokens_valid=None, memory_efficient_bf16=False, memory_efficient_fp16=False, min_len=1, min_loss_scale=0.0001, model_overrides='{}', model_parallel_size=1, nbest=1, no_beamable_mm=False, no_early_stop=False, no_progress_bar=False, no_repeat_ngram_size=0, no_seed_provided=False, nprocs_per_node=1, num_batch_buckets=0, num_shards=1, num_workers=1, optimizer=None, path='save/checkpoint10.pt', pipeline_balance=None, pipeline_checkpoint='never', pipeline_chunks=0, pipeline_decoder_balance=None, pipeline_decoder_devices=None, pipeline_devices=None, pipeline_encoder_balance=None, pipeline_encoder_devices=None, pipeline_model_parallel=False, prefix_size=0, print_alignment=False, print_step=False, profile=False, quantization_config_path=None, quiet=False, remove_bpe=None, replace_unk=None, required_batch_size_multiple=8, required_seq_len_multiple=1, results_path=None, retain_dropout=False, retain_dropout_modules=None, retain_iter_history=False, sacrebleu=False, sampling=False, sampling_topk=-1, sampling_topp=-1.0, score_reference=False, scoring='bleu', seed=1, shard_id=0, skip_invalid_size_inputs_valid_test=False, slowmo_algorithm='LocalSGD', slowmo_momentum=None, source_lang=None, target_lang=None, task='translation', temperature=1.0, tensorboard_logdir=None, threshold_loss_scale=None, tokenizer=None, tpu=False, train_subset='train', truncate_source=False, unkpen=0, unnormalized=False, upsample_primary=1, user_dir=None, valid_subset='valid', validate_after_updates=0, validate_interval=1, validate_interval_updates=0, warmup_updates=0, zero_sharding='none')\n", | |
| "2020-11-30 13:00:59 | INFO | fairseq.tasks.translation | [ja] dictionary: 8000 types\n", | |
| "2020-11-30 13:00:59 | INFO | fairseq.tasks.translation | [en] dictionary: 8000 types\n", | |
| "2020-11-30 13:00:59 | INFO | fairseq_cli.interactive | loading model(s) from save/checkpoint10.pt\n", | |
| "2020-11-30 13:01:04 | INFO | fairseq_cli.interactive | NOTE: hypothesis and token scores are output in base 2\n", | |
| "2020-11-30 13:01:04 | INFO | fairseq_cli.interactive | Type the input sentence and press return:\n", | |
| "S-0\t▁ 無 理 .\n", | |
| "W-0\t0.032\tseconds\n", | |
| "H-0\t-4.630398750305176\t▁M r .\n", | |
| "D-0\t-4.630398750305176\t▁M r .\n", | |
| "P-0\t-3.1007 -4.3704 -0.4348 -2.7320\n", | |
| "2020-11-30 13:01:04 | INFO | fairseq_cli.interactive | Total time: 4.443 seconds; translation time: 0.032\n" | |
| ], | |
| "name": "stdout" | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "EpqCNZw8N1Dd", | |
| "outputId": "8b2db03a-f6f9-4336-e221-fe206fa4c18f" | |
| }, | |
| "source": [ | |
| " ! echo '私は日本人です.' | python spm.py | fairseq-interactive data --path save/checkpoint10.pt --lenpen 0.6" | |
| ], | |
| "execution_count": 61, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "2020-11-30 13:00:52 | INFO | fairseq_cli.interactive | Namespace(all_gather_list_size=16384, batch_size=1, batch_size_valid=None, beam=5, bf16=False, bpe=None, broadcast_buffers=False, bucket_cap_mb=25, buffer_size=1, checkpoint_shard_count=1, checkpoint_suffix='', constraints=None, cpu=False, criterion='cross_entropy', curriculum=0, data='data', data_buffer_size=10, dataset_impl=None, ddp_backend='c10d', decoding_format=None, device_id=0, disable_validation=False, distributed_backend='nccl', distributed_init_method=None, distributed_no_spawn=False, distributed_num_procs=1, distributed_port=-1, distributed_rank=0, distributed_world_size=1, distributed_wrapper='DDP', diverse_beam_groups=-1, diverse_beam_strength=0.5, diversity_rate=-1.0, empty_cache_freq=0, eval_bleu=False, eval_bleu_args=None, eval_bleu_detok='space', eval_bleu_detok_args=None, eval_bleu_print_samples=False, eval_bleu_remove_bpe=None, eval_tokenized_bleu=False, fast_stat_sync=False, find_unused_parameters=False, fix_batches_to_gpus=False, fixed_validation_seed=None, force_anneal=None, fp16=False, fp16_init_scale=128, fp16_no_flatten_grads=False, fp16_scale_tolerance=0.0, fp16_scale_window=None, gen_subset='test', input='-', iter_decode_eos_penalty=0.0, iter_decode_force_max_iter=False, iter_decode_max_iter=10, iter_decode_with_beam=1, iter_decode_with_external_reranker=False, left_pad_source='True', left_pad_target='False', lenpen=0.6, lm_path=None, lm_weight=0.0, load_alignments=False, localsgd_frequency=3, log_format=None, log_interval=100, lr_scheduler='fixed', lr_shrink=0.1, match_source_len=False, max_len_a=0, max_len_b=200, max_source_positions=1024, max_target_positions=1024, max_tokens=None, max_tokens_valid=None, memory_efficient_bf16=False, memory_efficient_fp16=False, min_len=1, min_loss_scale=0.0001, model_overrides='{}', model_parallel_size=1, nbest=1, no_beamable_mm=False, no_early_stop=False, no_progress_bar=False, no_repeat_ngram_size=0, no_seed_provided=False, nprocs_per_node=1, num_batch_buckets=0, num_shards=1, num_workers=1, optimizer=None, path='save/checkpoint10.pt', pipeline_balance=None, pipeline_checkpoint='never', pipeline_chunks=0, pipeline_decoder_balance=None, pipeline_decoder_devices=None, pipeline_devices=None, pipeline_encoder_balance=None, pipeline_encoder_devices=None, pipeline_model_parallel=False, prefix_size=0, print_alignment=False, print_step=False, profile=False, quantization_config_path=None, quiet=False, remove_bpe=None, replace_unk=None, required_batch_size_multiple=8, required_seq_len_multiple=1, results_path=None, retain_dropout=False, retain_dropout_modules=None, retain_iter_history=False, sacrebleu=False, sampling=False, sampling_topk=-1, sampling_topp=-1.0, score_reference=False, scoring='bleu', seed=1, shard_id=0, skip_invalid_size_inputs_valid_test=False, slowmo_algorithm='LocalSGD', slowmo_momentum=None, source_lang=None, target_lang=None, task='translation', temperature=1.0, tensorboard_logdir=None, threshold_loss_scale=None, tokenizer=None, tpu=False, train_subset='train', truncate_source=False, unkpen=0, unnormalized=False, upsample_primary=1, user_dir=None, valid_subset='valid', validate_after_updates=0, validate_interval=1, validate_interval_updates=0, warmup_updates=0, zero_sharding='none')\n", | |
| "2020-11-30 13:00:52 | INFO | fairseq.tasks.translation | [ja] dictionary: 8000 types\n", | |
| "2020-11-30 13:00:52 | INFO | fairseq.tasks.translation | [en] dictionary: 8000 types\n", | |
| "2020-11-30 13:00:52 | INFO | fairseq_cli.interactive | loading model(s) from save/checkpoint10.pt\n", | |
| "2020-11-30 13:00:57 | INFO | fairseq_cli.interactive | NOTE: hypothesis and token scores are output in base 2\n", | |
| "2020-11-30 13:00:57 | INFO | fairseq_cli.interactive | Type the input sentence and press return:\n", | |
| "S-0\t▁ 私 は日本 人で す .\n", | |
| "W-0\t0.047\tseconds\n", | |
| "H-0\t-4.104242324829102\t▁It ▁is ▁a ▁person ▁in ▁Japan .\n", | |
| "D-0\t-4.104242324829102\t▁It ▁is ▁a ▁person ▁in ▁Japan .\n", | |
| "P-0\t-3.1922 -0.7839 -1.8297 -4.0475 -2.9771 -0.9106 -0.4757 -0.0750\n", | |
| "2020-11-30 13:00:57 | INFO | fairseq_cli.interactive | Total time: 4.495 seconds; translation time: 0.047\n" | |
| ], | |
| "name": "stdout" | |
| } | |
| ] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment