-
-
Save hirosh/30fe99861ba73740e1fa13accdc94dae to your computer and use it in GitHub Desktop.
GPT-2日本語解析.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "GPT-2日本語解析.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"authorship_tag": "ABX9TyMqR/8s2eHJgB3/9ssmS05D", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/hirosh/30fe99861ba73740e1fa13accdc94dae/gpt-2.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "VwuIoM_t7wMT" | |
}, | |
"source": [ | |
"# GPT-2による日本語生成\n", | |
"\n", | |
"https://huggingface.co/colorfulscoop/gpt2-small-ja" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ZJ0CuEnC1glr" | |
}, | |
"source": [ | |
"!pip install transformers==4.3.3 torch==1.8.0 sentencepiece==0.1.91" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "EmCbQjyK1vOT", | |
"outputId": "84881d0f-f58d-46af-cf9f-4d465f7205c1" | |
}, | |
"source": [ | |
"import transformers\n", | |
"\n", | |
"tokenizer = transformers.AutoTokenizer.from_pretrained(\"colorfulscoop/gpt2-small-ja\")\n", | |
"model = transformers.AutoModelForCausalLM.from_pretrained(\"colorfulscoop/gpt2-small-ja\")\n", | |
"\n", | |
"input = tokenizer.encode(\"創庵とは\", return_tensors=\"pt\")\n", | |
"output = model.generate(input, do_sample=True, top_p=0.95, top_k=50, num_return_sequences=3)\n", | |
"print(tokenizer.batch_decode(output))" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"['創庵とは、宋時代の徽宗の私室を復興させることを目的として、南宋時代の建築様式', '創庵とは、中国南宋の儒学者、徽宗復斎の弟である紹宗復斎', '創庵とは、江戸時代中期(宝暦時代)に、江戸浅草新伝馬町8番地']\n" | |
], | |
"name": "stdout" | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment