Last active
December 5, 2020 03:17
-
-
Save JonathanReeve/25708b17dd172f9d5d6ba6f9d74a4ab7 to your computer and use it in GitHub Desktop.
macro-etym
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "macro-etym", | |
"provenance": [], | |
"collapsed_sections": [], | |
"authorship_tag": "ABX9TyPHrTxhX5VPdbUE83otXa7E", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/JonathanReeve/25708b17dd172f9d5d6ba6f9d74a4ab7/macro-etym.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "gJ9MVAdxcM55" | |
}, | |
"source": [ | |
"# Macro-Etymological Textual Analysis\n", | |
"\n", | |
"First, download the Python package from GitHub:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "3sGvhRMaa7Fu", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "17af2588-45d2-4fe6-9443-e0836df63bb4" | |
}, | |
"source": [ | |
"!pip install git+https://github.com/JonathanReeve/macro-etym" | |
], | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Collecting git+https://github.com/JonathanReeve/macro-etym\n", | |
" Cloning https://github.com/JonathanReeve/macro-etym to /tmp/pip-req-build-ckxwjp8x\n", | |
" Running command git clone -q https://github.com/JonathanReeve/macro-etym /tmp/pip-req-build-ckxwjp8x\n", | |
"Requirement already satisfied (use --upgrade to upgrade): macroetym==0.1.2 from git+https://github.com/JonathanReeve/macro-etym in /usr/local/lib/python3.6/dist-packages\n", | |
"Requirement already satisfied: Click in /usr/local/lib/python3.6/dist-packages (from macroetym==0.1.2) (7.1.2)\n", | |
"Requirement already satisfied: nltk in /usr/local/lib/python3.6/dist-packages (from macroetym==0.1.2) (3.2.5)\n", | |
"Requirement already satisfied: pycountry in /usr/local/lib/python3.6/dist-packages (from macroetym==0.1.2) (20.7.3)\n", | |
"Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from macroetym==0.1.2) (1.1.4)\n", | |
"Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (from macroetym==0.1.2) (3.2.2)\n", | |
"Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from nltk->macroetym==0.1.2) (1.15.0)\n", | |
"Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->macroetym==0.1.2) (2018.9)\n", | |
"Requirement already satisfied: numpy>=1.15.4 in /usr/local/lib/python3.6/dist-packages (from pandas->macroetym==0.1.2) (1.18.5)\n", | |
"Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.6/dist-packages (from pandas->macroetym==0.1.2) (2.8.1)\n", | |
"Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib->macroetym==0.1.2) (0.10.0)\n", | |
"Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->macroetym==0.1.2) (1.3.1)\n", | |
"Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->macroetym==0.1.2) (2.4.7)\n", | |
"Building wheels for collected packages: macroetym\n", | |
" Building wheel for macroetym (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for macroetym: filename=macroetym-0.1.2-cp36-none-any.whl size=11334095 sha256=c9db7c876d0a455cd10ff21fa5ea42c34468b953616d93a8a764c0794c7a74ba\n", | |
" Stored in directory: /tmp/pip-ephem-wheel-cache-ns5t03av/wheels/8e/23/b0/7a72b270081d27d01fe9abd0be7b77bb399d2020d6ef88d129\n", | |
"Successfully built macroetym\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "UiWGCGpScW0B" | |
}, | |
"source": [ | |
"Import all the required libraries: " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "IwtyXcqsOXLc" | |
}, | |
"source": [ | |
"import macroetym\n", | |
"import requests\n", | |
"import json\n", | |
"import nltk\n", | |
"from macroetym.main import Text" | |
], | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "kyJQ0pR2cabR" | |
}, | |
"source": [ | |
"Download some NLTK data: " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "tSmQVJqqQ_MU", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "e1363bd4-94e0-4d73-fe7b-fef3417242ff" | |
}, | |
"source": [ | |
"nltk.download('book')" | |
], | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[nltk_data] Downloading collection 'book'\n", | |
"[nltk_data] | \n", | |
"[nltk_data] | Downloading package abc to /root/nltk_data...\n", | |
"[nltk_data] | Package abc is already up-to-date!\n", | |
"[nltk_data] | Downloading package brown to /root/nltk_data...\n", | |
"[nltk_data] | Package brown is already up-to-date!\n", | |
"[nltk_data] | Downloading package chat80 to /root/nltk_data...\n", | |
"[nltk_data] | Package chat80 is already up-to-date!\n", | |
"[nltk_data] | Downloading package cmudict to /root/nltk_data...\n", | |
"[nltk_data] | Package cmudict is already up-to-date!\n", | |
"[nltk_data] | Downloading package conll2000 to /root/nltk_data...\n", | |
"[nltk_data] | Package conll2000 is already up-to-date!\n", | |
"[nltk_data] | Downloading package conll2002 to /root/nltk_data...\n", | |
"[nltk_data] | Package conll2002 is already up-to-date!\n", | |
"[nltk_data] | Downloading package dependency_treebank to\n", | |
"[nltk_data] | /root/nltk_data...\n", | |
"[nltk_data] | Package dependency_treebank is already up-to-date!\n", | |
"[nltk_data] | Downloading package genesis to /root/nltk_data...\n", | |
"[nltk_data] | Package genesis is already up-to-date!\n", | |
"[nltk_data] | Downloading package gutenberg to /root/nltk_data...\n", | |
"[nltk_data] | Package gutenberg is already up-to-date!\n", | |
"[nltk_data] | Downloading package ieer to /root/nltk_data...\n", | |
"[nltk_data] | Package ieer is already up-to-date!\n", | |
"[nltk_data] | Downloading package inaugural to /root/nltk_data...\n", | |
"[nltk_data] | Package inaugural is already up-to-date!\n", | |
"[nltk_data] | Downloading package movie_reviews to\n", | |
"[nltk_data] | /root/nltk_data...\n", | |
"[nltk_data] | Package movie_reviews is already up-to-date!\n", | |
"[nltk_data] | Downloading package nps_chat to /root/nltk_data...\n", | |
"[nltk_data] | Package nps_chat is already up-to-date!\n", | |
"[nltk_data] | Downloading package names to /root/nltk_data...\n", | |
"[nltk_data] | Package names is already up-to-date!\n", | |
"[nltk_data] | Downloading package ppattach to /root/nltk_data...\n", | |
"[nltk_data] | Package ppattach is already up-to-date!\n", | |
"[nltk_data] | Downloading package reuters to /root/nltk_data...\n", | |
"[nltk_data] | Package reuters is already up-to-date!\n", | |
"[nltk_data] | Downloading package senseval to /root/nltk_data...\n", | |
"[nltk_data] | Package senseval is already up-to-date!\n", | |
"[nltk_data] | Downloading package state_union to /root/nltk_data...\n", | |
"[nltk_data] | Package state_union is already up-to-date!\n", | |
"[nltk_data] | Downloading package stopwords to /root/nltk_data...\n", | |
"[nltk_data] | Package stopwords is already up-to-date!\n", | |
"[nltk_data] | Downloading package swadesh to /root/nltk_data...\n", | |
"[nltk_data] | Package swadesh is already up-to-date!\n", | |
"[nltk_data] | Downloading package timit to /root/nltk_data...\n", | |
"[nltk_data] | Package timit is already up-to-date!\n", | |
"[nltk_data] | Downloading package treebank to /root/nltk_data...\n", | |
"[nltk_data] | Package treebank is already up-to-date!\n", | |
"[nltk_data] | Downloading package toolbox to /root/nltk_data...\n", | |
"[nltk_data] | Package toolbox is already up-to-date!\n", | |
"[nltk_data] | Downloading package udhr to /root/nltk_data...\n", | |
"[nltk_data] | Package udhr is already up-to-date!\n", | |
"[nltk_data] | Downloading package udhr2 to /root/nltk_data...\n", | |
"[nltk_data] | Package udhr2 is already up-to-date!\n", | |
"[nltk_data] | Downloading package unicode_samples to\n", | |
"[nltk_data] | /root/nltk_data...\n", | |
"[nltk_data] | Package unicode_samples is already up-to-date!\n", | |
"[nltk_data] | Downloading package webtext to /root/nltk_data...\n", | |
"[nltk_data] | Package webtext is already up-to-date!\n", | |
"[nltk_data] | Downloading package wordnet to /root/nltk_data...\n", | |
"[nltk_data] | Package wordnet is already up-to-date!\n", | |
"[nltk_data] | Downloading package wordnet_ic to /root/nltk_data...\n", | |
"[nltk_data] | Package wordnet_ic is already up-to-date!\n", | |
"[nltk_data] | Downloading package words to /root/nltk_data...\n", | |
"[nltk_data] | Package words is already up-to-date!\n", | |
"[nltk_data] | Downloading package maxent_treebank_pos_tagger to\n", | |
"[nltk_data] | /root/nltk_data...\n", | |
"[nltk_data] | Package maxent_treebank_pos_tagger is already up-\n", | |
"[nltk_data] | to-date!\n", | |
"[nltk_data] | Downloading package maxent_ne_chunker to\n", | |
"[nltk_data] | /root/nltk_data...\n", | |
"[nltk_data] | Package maxent_ne_chunker is already up-to-date!\n", | |
"[nltk_data] | Downloading package universal_tagset to\n", | |
"[nltk_data] | /root/nltk_data...\n", | |
"[nltk_data] | Package universal_tagset is already up-to-date!\n", | |
"[nltk_data] | Downloading package punkt to /root/nltk_data...\n", | |
"[nltk_data] | Package punkt is already up-to-date!\n", | |
"[nltk_data] | Downloading package book_grammars to\n", | |
"[nltk_data] | /root/nltk_data...\n", | |
"[nltk_data] | Package book_grammars is already up-to-date!\n", | |
"[nltk_data] | Downloading package city_database to\n", | |
"[nltk_data] | /root/nltk_data...\n", | |
"[nltk_data] | Package city_database is already up-to-date!\n", | |
"[nltk_data] | Downloading package tagsets to /root/nltk_data...\n", | |
"[nltk_data] | Package tagsets is already up-to-date!\n", | |
"[nltk_data] | Downloading package panlex_swadesh to\n", | |
"[nltk_data] | /root/nltk_data...\n", | |
"[nltk_data] | Package panlex_swadesh is already up-to-date!\n", | |
"[nltk_data] | Downloading package averaged_perceptron_tagger to\n", | |
"[nltk_data] | /root/nltk_data...\n", | |
"[nltk_data] | Package averaged_perceptron_tagger is already up-\n", | |
"[nltk_data] | to-date!\n", | |
"[nltk_data] | \n", | |
"[nltk_data] Done downloading collection book\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 4 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "-UXEwaicciSX" | |
}, | |
"source": [ | |
"Grab a test text: " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "iin_XbwIQUyR" | |
}, | |
"source": [ | |
"response = requests.get('http://corpus-db.org/api/id/10.0/fulltext').text\n", | |
"bible = json.loads(response)[0]['text']" | |
], | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ZLDcQKgIQzIh", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 52 | |
}, | |
"outputId": "b24a5fba-5acb-40d4-b79d-be03b4212a26" | |
}, | |
"source": [ | |
"bible[:300]" | |
], | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"application/vnd.google.colaboratory.intrinsic+json": { | |
"type": "string" | |
}, | |
"text/plain": [ | |
"'\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nThe Old Testament of the King James Version of the Bible\\n\\n\\n\\n\\nThe First Book of Moses: Called Genesis\\n\\n\\n1:1 In the beginning God created the heaven and the earth.\\n\\n1:2 And the earth was without form, and void; and darkness was upon\\nthe face of the deep. And the Spirit of God moved upon'" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 6 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "Nr8hC6G2cmE1" | |
}, | |
"source": [ | |
"Load it into a `Text()` object: " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "9orcO89GQO0r" | |
}, | |
"source": [ | |
"bibleText = Text(bible)" | |
], | |
"execution_count": 7, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "0A-nhyiCcpsu" | |
}, | |
"source": [ | |
"And print out stats: " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "UJ_5PKmv2yQs", | |
"outputId": "a5b179f0-bc82-426e-911f-481ba321c876" | |
}, | |
"source": [ | |
"bibleText.stats" | |
], | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'ang': 42.50495206402029,\n", | |
" 'ara': 0.10696458283812696,\n", | |
" 'arg': 0.02376990729736155,\n", | |
" 'arz': 0.02376990729736155,\n", | |
" 'bod': 0.02376990729736155,\n", | |
" 'cmn': 0.02376990729736155,\n", | |
" 'cym': 0.0475398145947231,\n", | |
" 'deu': 0.011884953648680774,\n", | |
" 'dum': 0.0950796291894462,\n", | |
" 'enm': 0.02376990729736155,\n", | |
" 'fas': 0.007923302432453849,\n", | |
" 'fra': 4.056334680294748,\n", | |
" 'frm': 2.914586799778148,\n", | |
" 'fro': 25.964265906029627,\n", | |
" 'gae': 0.02376990729736155,\n", | |
" 'gle': 0.0475398145947231,\n", | |
" 'gml': 0.03763568655415578,\n", | |
" 'grc': 0.5744394263529041,\n", | |
" 'haw': 0.07130972189208463,\n", | |
" 'heb': 0.23373742175738857,\n", | |
" 'hin': 0.07130972189208463,\n", | |
" 'hye': 0.011884953648680774,\n", | |
" 'ita': 0.05942476824340386,\n", | |
" 'jpn': 0.02376990729736155,\n", | |
" 'kor': 0.02376990729736155,\n", | |
" 'lat': 11.88693447428889,\n", | |
" 'mri': 0.007923302432453849,\n", | |
" 'msa': 0.02376990729736155,\n", | |
" 'nld': 0.14261944378416927,\n", | |
" 'non': 2.5382299342365906,\n", | |
" 'odt': 0.02376990729736155,\n", | |
" 'por': 0.07725219871642502,\n", | |
" 'rus': 0.0475398145947231,\n", | |
" 'sco': 0.02376990729736155,\n", | |
" 'spa': 0.07606370335155693,\n", | |
" 'tur': 0.0950796291894462,\n", | |
" 'xno': 8.034228666508202,\n", | |
" 'yid': 0.015846604864907698}" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 8 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "hw3kx-yTQ9Hj", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "731005f3-564a-44c4-cd2f-be4e7e73d755" | |
}, | |
"source": [ | |
"bibleText.prettyStats" | |
], | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'Ancient Greek (to 1453)': 0.57,\n", | |
" 'Anglo-Norman': 8.03,\n", | |
" 'Arabic': 0.11,\n", | |
" 'Aragonese': 0.02,\n", | |
" 'Armenian': 0.01,\n", | |
" 'Dutch': 0.14,\n", | |
" 'Egyptian Arabic': 0.02,\n", | |
" 'French': 4.06,\n", | |
" 'German': 0.01,\n", | |
" 'Guarequena': 0.02,\n", | |
" 'Hawaiian': 0.07,\n", | |
" 'Hebrew': 0.23,\n", | |
" 'Hindi': 0.07,\n", | |
" 'Irish': 0.05,\n", | |
" 'Italian': 0.06,\n", | |
" 'Japanese': 0.02,\n", | |
" 'Korean': 0.02,\n", | |
" 'Latin': 11.89,\n", | |
" 'Malay (macrolanguage)': 0.02,\n", | |
" 'Mandarin Chinese': 0.02,\n", | |
" 'Maori': 0.01,\n", | |
" 'Middle Dutch (ca. 1050-1350)': 0.1,\n", | |
" 'Middle English (1100-1500)': 0.02,\n", | |
" 'Middle French (ca. 1400-1600)': 2.91,\n", | |
" 'Middle Low German': 0.04,\n", | |
" 'Old Dutch': 0.02,\n", | |
" 'Old English (ca. 450-1100)': 42.5,\n", | |
" 'Old French (842-ca. 1400)': 25.96,\n", | |
" 'Old Norse': 2.54,\n", | |
" 'Persian': 0.01,\n", | |
" 'Portuguese': 0.08,\n", | |
" 'Russian': 0.05,\n", | |
" 'Scots': 0.02,\n", | |
" 'Spanish': 0.08,\n", | |
" 'Tibetan': 0.02,\n", | |
" 'Turkish': 0.1,\n", | |
" 'Welsh': 0.05,\n", | |
" 'Yiddish': 0.02}" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 9 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Se5q6lmrRRHb", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "f05e11c6-71f0-41e4-af1e-7b25980859ab" | |
}, | |
"source": [ | |
"bibleText.familyStats()" | |
], | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{'Austronesian': 0.007923302432453849,\n", | |
" 'Balto-Slavic': 0.0475398145947231,\n", | |
" 'Celtic': 0.0475398145947231,\n", | |
" 'Germanic': 45.393788130892965,\n", | |
" 'Hellenic': 0.5744394263529041,\n", | |
" 'Indo-Iranian': 0.07923302432453848,\n", | |
" 'Japonic': 0.02376990729736155,\n", | |
" 'Latinate': 53.069091197210994,\n", | |
" 'Other': 0.32089374851438096,\n", | |
" 'Semitic': 0.34070200459551553,\n", | |
" 'Turkic': 0.0950796291894462,\n", | |
" 'Uralic': 0.0}" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 10 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "-TeIohzva0Zz", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "9a914412-fb52-43cb-cee6-ab0b5d004c33" | |
}, | |
"source": [ | |
"bibleText" | |
], | |
"execution_count": 11, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"<macroetym.main.Text at 0x7f8146371128>" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 11 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "QFFZWlBs068l", | |
"outputId": "b50a3e91-d21f-425b-9eff-089487c113ed" | |
}, | |
"source": [ | |
"word = bibleText.wordObjects[100]\n", | |
"print(word)" | |
], | |
"execution_count": 12, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"husbandry\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "oTss9rUo1Nwp", | |
"outputId": "38429874-89b9-4a48-b1ee-0580f3418a58" | |
}, | |
"source": [ | |
"word.parentLanguages" | |
], | |
"execution_count": 33, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"[]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 33 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "PkneCVB61UxA", | |
"outputId": "53e7bfa6-251f-4dad-ea56-67cbdeef0640" | |
}, | |
"source": [ | |
"word.parentLanguages.langs # This is the for-real list" | |
], | |
"execution_count": 30, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"[]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 30 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "835UgQzq2EfC", | |
"outputId": "12f20a6a-5adc-49e9-bb0b-bfcb615f5304" | |
}, | |
"source": [ | |
"'ang' in word.parentLanguages.langs" | |
], | |
"execution_count": 31, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"False" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 31 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "yjBB5j_81fVB", | |
"outputId": "b22e7362-3e9a-4343-9149-34d0c60ec595" | |
}, | |
"source": [ | |
"[w for w in bibleText.wordObjects if 'fro' in word.parentLanguages.stats]" | |
], | |
"execution_count": 22, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"[]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 22 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "dTf2lpup1zUY", | |
"outputId": "dfa821d2-abd0-4dd7-9d42-924caab6b51f" | |
}, | |
"source": [ | |
"word.parentLanguages.stats" | |
], | |
"execution_count": 23, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{}" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 23 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Va2IFDC32n-v" | |
}, | |
"source": [ | |
"word2 = bibleText.wordObjects[200]" | |
], | |
"execution_count": 24, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "v-Hv09lo3Rl7", | |
"outputId": "f950e529-5ed6-471e-ce5c-026a27cf6a3a" | |
}, | |
"source": [ | |
"word2.parentLanguages" | |
], | |
"execution_count": 28, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"['ang']" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 28 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ec33MIWP4Cou" | |
}, | |
"source": [ | |
"word3 = bibleText.wordObjects[300]" | |
], | |
"execution_count": 34, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "_cywa9_c4Q4m", | |
"outputId": "b933918b-4a50-4e5e-c0cf-9eaeb1e5a32f" | |
}, | |
"source": [ | |
"word3" | |
], | |
"execution_count": 35, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"kind (eng)" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 35 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "xDBTjNOo4R95", | |
"outputId": "f75386e7-8d3e-4fc3-b3ad-ba211b454a34" | |
}, | |
"source": [ | |
"word3.parentLanguages" | |
], | |
"execution_count": 36, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"['ang']" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 36 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "_3x5Cdy44VNM", | |
"outputId": "990fd2f6-62be-4b2f-fe64-0e62de08ea51" | |
}, | |
"source": [ | |
"for word in bibleText.wordObjects[301:310]:\n", | |
" print(word.parentLanguages)" | |
], | |
"execution_count": 38, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[]\n", | |
"['ang']\n", | |
"['fro']\n", | |
"[]\n", | |
"['fro']\n", | |
"[]\n", | |
"['lat']\n", | |
"[]\n", | |
"[]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "3C3SzO_r4fRB", | |
"outputId": "a4ecaf21-1e4e-4be1-8750-a5bc2165cddf" | |
}, | |
"source": [ | |
"bibleText.wordObjects[303].parentLanguages.langs" | |
], | |
"execution_count": 41, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"['fro']" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 41 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "eSxFQRWp4nSL", | |
"outputId": "0a9b5092-7dcc-44f4-e77f-a19a9e1a766f" | |
}, | |
"source": [ | |
"'fro' in bibleText.wordObjects[303].parentLanguages.langs" | |
], | |
"execution_count": 42, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 42 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "jHPnZzGe5QvM" | |
}, | |
"source": [ | |
"# Example for getting a list of only those words which are of a certain language's origin" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "R5khuEzl4-ED", | |
"outputId": "fa4c40ad-a32c-4d1c-b30d-6522130f66eb" | |
}, | |
"source": [ | |
"# Using only words 300-350 to save time\n", | |
"for w in bibleText.wordObjects[300:350]:\n", | |
" # I think you have to run this first so that `langs` is populated\n", | |
" # (since it's lazily-loaded). \n", | |
" parentLangs = w.parentLanguages.langs\n", | |
" # If 'Old French' is in the list of parent languages\n", | |
" if 'fro' in parentLangs: \n", | |
" print(w)" | |
], | |
"execution_count": 43, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"oppose\n", | |
"testify\n", | |
"lionlike\n", | |
"cause\n", | |
"authority\n", | |
"square\n", | |
"promise\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "S6n_F7Bb5Pe2" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment