Created
February 28, 2019 11:19
-
-
Save reflash/7ca131600e3a5730b869e5125a43490f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Requirement already satisfied: nltk in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (3.4)\n", | |
"Requirement already satisfied: six in c:\\users\\daniil_ekzarian\\appdata\\roaming\\python\\python37\\site-packages (from nltk) (1.12.0)\n", | |
"Requirement already satisfied: singledispatch in c:\\users\\daniil_ekzarian\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages (from nltk) (3.4.0.3)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"You are using pip version 18.1, however version 19.0.3 is available.\n", | |
"You should consider upgrading via the 'python -m pip install --upgrade pip' command.\n", | |
"[nltk_data] Downloading package comtrans to\n", | |
"[nltk_data] C:\\Users\\Daniil_Ekzarian\\AppData\\Roaming\\nltk_data...\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import sys\n", | |
"!{sys.executable} -m pip install nltk\n", | |
"\n", | |
"import nltk\n", | |
"nltk.download('comtrans')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['klein', 'ist', 'das', 'Haus']" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from nltk.translate import AlignedSent, Alignment\n", | |
"algnsent = AlignedSent(['klein', 'ist', 'das', 'Haus'],\n", | |
" ['the', 'house', 'is', 'small'], Alignment.fromstring('0-3 1-2 2-0 3-1'))\n", | |
"algnsent.words" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['the', 'house', 'is', 'small']" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"algnsent.mots" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Alignment([(0, 3), (1, 2), (2, 0), (3, 1)])" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"algnsent.alignment" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<AlignedSent: 'Weshalb also sollten...' -> 'So why should EU arm...'>\n", | |
"0-0 0-1 1-0 2-2 3-4 3-5 4-7 5-8 6-3 7-9 8-9 9-10 9-11 10-12 11-6 12-6 13-13\n" | |
] | |
} | |
], | |
"source": [ | |
"from nltk.corpus import comtrans\n", | |
"print(comtrans.aligned_sents()[54])\n", | |
"print(comtrans.aligned_sents()[54].alignment)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment