Last active
May 22, 2022 09:56
-
-
Save xflr6/8d5b4d71e6464f21608bafc0675294a9 to your computer and use it in GitHub Desktop.
Check Glottolog -> Wikidata mapping
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "fad09ef2-fad6-470a-b17c-e67f5f64ff7c", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"[INFO@sqlalchemy.engine.Engine] select pg_catalog.version()\n", | |
"[INFO@sqlalchemy.engine.Engine] [raw sql] {}\n", | |
"[INFO@sqlalchemy.engine.Engine] select current_schema()\n", | |
"[INFO@sqlalchemy.engine.Engine] [raw sql] {}\n", | |
"[INFO@sqlalchemy.engine.Engine] show standard_conforming_strings\n", | |
"[INFO@sqlalchemy.engine.Engine] [raw sql] {}\n", | |
"[INFO@sqlalchemy.engine.Engine] SELECT\n", | |
" l.id AS glottocode,\n", | |
" l.name,\n", | |
" ll.level,\n", | |
" ll.category,\n", | |
" substring(wikidata_link->>'url' FROM '/([^/]+)$') AS qid,\n", | |
" substring(wikipedia_link->>'url' FROM '/([^/]+)$') AS title,\n", | |
" i.name AS iso639_3\n", | |
"FROM language AS l\n", | |
"JOIN languoid AS ll USING (pk)\n", | |
"CROSS JOIN jsonb_path_query(l.jsondata::jsonb,\n", | |
" '$.links[*] ? (@.url starts with \"https://www.wikidata.org/entity/\")') AS wikidata_link\n", | |
"LEFT JOIN jsonb_path_query(l.jsondata::jsonb,\n", | |
" '$.links[*] ? (@.url starts with \"https://en.wikipedia.org/wiki/\")') AS wikipedia_link ON TRUE\n", | |
"LEFT JOIN (\n", | |
" languageidentifier AS li\n", | |
" JOIN identifier AS i\n", | |
" ON li.identifier_pk = i.pk AND i.type = 'iso639-3'\n", | |
") ON li.language_pk = l.pk\n", | |
"ORDER BY l.id\n", | |
"[INFO@sqlalchemy.engine.Engine] [raw sql] {}\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"Index: 10539 entries, aant1238 to zyph1238\n", | |
"Data columns (total 6 columns):\n", | |
" # Column Non-Null Count Dtype \n", | |
"--- ------ -------------- ----- \n", | |
" 0 name 10539 non-null string\n", | |
" 1 level 10539 non-null string\n", | |
" 2 category 10539 non-null string\n", | |
" 3 qid 10539 non-null string\n", | |
" 4 title 8378 non-null string\n", | |
" 5 iso639_3 7845 non-null string\n", | |
"dtypes: string(6)\n", | |
"memory usage: 4.6 MB\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>name</th>\n", | |
" <th>level</th>\n", | |
" <th>category</th>\n", | |
" <th>qid</th>\n", | |
" <th>title</th>\n", | |
" <th>iso639_3</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>glottocode</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>aant1238</th>\n", | |
" <td>Aantantara</td>\n", | |
" <td>dialect</td>\n", | |
" <td>Dialect</td>\n", | |
" <td>Q31312216</td>\n", | |
" <td><NA></td>\n", | |
" <td><NA></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>aari1238</th>\n", | |
" <td>Aari-Gayil</td>\n", | |
" <td>family</td>\n", | |
" <td>Family</td>\n", | |
" <td>Q85516014</td>\n", | |
" <td><NA></td>\n", | |
" <td>aiz</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>aari1239</th>\n", | |
" <td>Aari</td>\n", | |
" <td>language</td>\n", | |
" <td>Spoken L1 Language</td>\n", | |
" <td>Q7495</td>\n", | |
" <td>Aari_language</td>\n", | |
" <td>aiw</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>aari1240</th>\n", | |
" <td>Aariya</td>\n", | |
" <td>language</td>\n", | |
" <td>Bookkeeping</td>\n", | |
" <td>Q4661732</td>\n", | |
" <td>Aariya_language</td>\n", | |
" <td>aay</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>aasa1238</th>\n", | |
" <td>Aasax</td>\n", | |
" <td>language</td>\n", | |
" <td>Spoken L1 Language</td>\n", | |
" <td>Q56620</td>\n", | |
" <td>Asa_language</td>\n", | |
" <td>aas</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" name level category qid \\\n", | |
"glottocode \n", | |
"aant1238 Aantantara dialect Dialect Q31312216 \n", | |
"aari1238 Aari-Gayil family Family Q85516014 \n", | |
"aari1239 Aari language Spoken L1 Language Q7495 \n", | |
"aari1240 Aariya language Bookkeeping Q4661732 \n", | |
"aasa1238 Aasax language Spoken L1 Language Q56620 \n", | |
"\n", | |
" title iso639_3 \n", | |
"glottocode \n", | |
"aant1238 <NA> <NA> \n", | |
"aari1238 <NA> aiz \n", | |
"aari1239 Aari_language aiw \n", | |
"aari1240 Aariya_language aay \n", | |
"aasa1238 Asa_language aas " | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import logging\n", | |
"import pathlib\n", | |
"\n", | |
"from IPython.display import display\n", | |
"import pandas as pd\n", | |
"import rdflib\n", | |
"\n", | |
"ENGINE = 'postgresql://postgres@/glottolog3'\n", | |
"\n", | |
"QUERY = '''\n", | |
"SELECT\n", | |
" l.id AS glottocode,\n", | |
" l.name,\n", | |
" ll.level,\n", | |
" ll.category,\n", | |
" substring(wikidata_link->>'url' FROM '/([^/]+)$') AS qid,\n", | |
" substring(wikipedia_link->>'url' FROM '/([^/]+)$') AS title,\n", | |
" i.name AS iso639_3\n", | |
"FROM language AS l\n", | |
"JOIN languoid AS ll USING (pk)\n", | |
"CROSS JOIN jsonb_path_query(l.jsondata::jsonb,\n", | |
" '$.links[*] ? (@.url starts with \"https://www.wikidata.org/entity/\")') AS wikidata_link\n", | |
"LEFT JOIN jsonb_path_query(l.jsondata::jsonb,\n", | |
" '$.links[*] ? (@.url starts with \"https://en.wikipedia.org/wiki/\")') AS wikipedia_link ON TRUE\n", | |
"LEFT JOIN (\n", | |
" languageidentifier AS li\n", | |
" JOIN identifier AS i\n", | |
" ON li.identifier_pk = i.pk AND i.type = 'iso639-3'\n", | |
") ON li.language_pk = l.pk\n", | |
"ORDER BY l.id\n", | |
"'''.strip()\n", | |
"\n", | |
"\n", | |
"logging.basicConfig(format='[%(levelname)s@%(name)s] %(message)s', level=logging.INFO)\n", | |
"logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)\n", | |
"\n", | |
"\n", | |
"gf = pd.read_sql_query(QUERY, ENGINE, index_col='glottocode', dtype='string')\n", | |
"\n", | |
"gf.info(memory_usage='deep')\n", | |
"assert gf.index.is_unique\n", | |
"assert gf.index.is_monotonic_increasing\n", | |
"gf.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "52d240a3-f66b-4e27-9374-902ea159f82c", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"[INFO@root] endpoint: 'https://query.wikidata.org/sparql'\n", | |
"[INFO@root] graph: [a rdflib:ConjunctiveGraph;rdflib:storage [a rdflib:Store;rdfs:label 'SPARQLStore']]\n", | |
"[INFO@root] prefixes: {'schema': Namespace(\"http://schema.org/\")}\n", | |
"[INFO@root] graph.query('''\n", | |
"SELECT\n", | |
" ?glottocode\n", | |
" (strafter(str(?languoid), str(wd:)) AS ?qid)\n", | |
" (?languoidLabel AS ?name)\n", | |
" (strafter(str(?siteLink), \"https://en.wikipedia.org/wiki/\") AS ?title)\n", | |
"WHERE {\n", | |
" ?languoid wdt:P1394 ?glottocode.\n", | |
" FILTER (REGEX(?glottocode, \"^[a-z0-9]{4}[0-9]{4}$\")).\n", | |
" OPTIONAL {\n", | |
" ?siteLink schema:about ?languoid;\n", | |
" schema:inLanguage \"en\";\n", | |
" schema:isPartOf <https://en.wikipedia.org/>.\n", | |
" }\n", | |
" SERVICE wikibase:label {\n", | |
" bd:serviceParam wikibase:language \"en\".\n", | |
" ?languoid rdfs:label ?languoidLabel.\n", | |
" }\n", | |
"}\n", | |
"ORDER BY\n", | |
" ?glottocode\n", | |
" xsd:integer(strafter(str(?languoid), str(wd:Q)))\n", | |
"OFFSET 0\n", | |
"LIMIT 100000\n", | |
"''')\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"Index: 10873 entries, aant1238 to zyph1238\n", | |
"Data columns (total 3 columns):\n", | |
" # Column Non-Null Count Dtype \n", | |
"--- ------ -------------- ----- \n", | |
" 0 qid 10873 non-null string\n", | |
" 1 name 10873 non-null string\n", | |
" 2 title 8682 non-null string\n", | |
"dtypes: string(3)\n", | |
"memory usage: 2.7 MB\n", | |
"CPU times: total: 625 ms\n", | |
"Wall time: 7.05 s\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>qid</th>\n", | |
" <th>name</th>\n", | |
" <th>title</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>glottocode</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>aant1238</th>\n", | |
" <td>Q31312216</td>\n", | |
" <td>Aantantara</td>\n", | |
" <td><NA></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>aari1238</th>\n", | |
" <td>Q85516014</td>\n", | |
" <td>Aari-Gayil</td>\n", | |
" <td><NA></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>aari1239</th>\n", | |
" <td>Q7495</td>\n", | |
" <td>Aari</td>\n", | |
" <td>Aari_language</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>aari1240</th>\n", | |
" <td>Q4661732</td>\n", | |
" <td>Aariya</td>\n", | |
" <td>Aariya_language</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>aasa1238</th>\n", | |
" <td>Q56620</td>\n", | |
" <td>Asa</td>\n", | |
" <td>Asa_language</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>aata1238</th>\n", | |
" <td>Q31314288</td>\n", | |
" <td>Aatasaara</td>\n", | |
" <td><NA></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>abaa1238</th>\n", | |
" <td>Q31363054</td>\n", | |
" <td>Aba dialect</td>\n", | |
" <td><NA></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>abab1239</th>\n", | |
" <td>Q17379636</td>\n", | |
" <td>Ababda</td>\n", | |
" <td><NA></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>abab1240</th>\n", | |
" <td>Q4931250</td>\n", | |
" <td>Boan</td>\n", | |
" <td>Boan_languages</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>abad1240</th>\n", | |
" <td>Q20644975</td>\n", | |
" <td>Abzakh Adyghe dialect</td>\n", | |
" <td>Abzakh_Adyghe_dialect</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" qid name title\n", | |
"glottocode \n", | |
"aant1238 Q31312216 Aantantara <NA>\n", | |
"aari1238 Q85516014 Aari-Gayil <NA>\n", | |
"aari1239 Q7495 Aari Aari_language\n", | |
"aari1240 Q4661732 Aariya Aariya_language\n", | |
"aasa1238 Q56620 Asa Asa_language\n", | |
"aata1238 Q31314288 Aatasaara <NA>\n", | |
"abaa1238 Q31363054 Aba dialect <NA>\n", | |
"abab1239 Q17379636 Ababda <NA>\n", | |
"abab1240 Q4931250 Boan Boan_languages\n", | |
"abad1240 Q20644975 Abzakh Adyghe dialect Abzakh_Adyghe_dialect" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"\n", | |
"ENDPOINT = 'https://query.wikidata.org/sparql'\n", | |
"\n", | |
"class SCHEMA(rdflib.SDO):\n", | |
" \"\"\"https://github.com/RDFLib/rdflib/issues/1120\"\"\"\n", | |
" _NS = rdflib.Namespace(rdflib.SDO._NS.replace('https://', 'http://'))\n", | |
"\n", | |
"PREFIXES = {'schema': SCHEMA}\n", | |
"\n", | |
"SPARQL_QUERY = '''\n", | |
"SELECT\n", | |
" ?glottocode\n", | |
" (strafter(str(?languoid), str(wd:)) AS ?qid)\n", | |
" (?languoidLabel AS ?name)\n", | |
" (strafter(str(?siteLink), \"https://en.wikipedia.org/wiki/\") AS ?title)\n", | |
"WHERE {\n", | |
" ?languoid wdt:P1394 ?glottocode.\n", | |
" FILTER (REGEX(?glottocode, \"^[a-z0-9]{4}[0-9]{4}$\")).\n", | |
" OPTIONAL {\n", | |
" ?siteLink schema:about ?languoid;\n", | |
" schema:inLanguage \"en\";\n", | |
" schema:isPartOf <https://en.wikipedia.org/>.\n", | |
" }\n", | |
" SERVICE wikibase:label {\n", | |
" bd:serviceParam wikibase:language \"en\".\n", | |
" ?languoid rdfs:label ?languoidLabel.\n", | |
" }\n", | |
"}\n", | |
"ORDER BY\n", | |
" ?glottocode\n", | |
" xsd:integer(strafter(str(?languoid), str(wd:Q)))\n", | |
"'''.strip()\n", | |
"\n", | |
"CSV_PATH = pathlib.Path('wikidata.csv')\n", | |
"\n", | |
"\n", | |
"def open_sparql_graph(endpoint=ENDPOINT, *, prefixes=PREFIXES):\n", | |
" logging.info('endpoint: %r', endpoint)\n", | |
" graph = rdflib.ConjunctiveGraph('SPARQLStore')\n", | |
" graph.open(endpoint)\n", | |
" logging.info('graph: %s', graph)\n", | |
" logging.info('prefixes: %r', prefixes)\n", | |
" for prefix, namespace in prefixes.items():\n", | |
" graph.namespace_manager.bind(prefix, namespace, replace=True)\n", | |
" logging.debug('namespaces: %r', list(graph.namespaces()))\n", | |
" return graph\n", | |
"\n", | |
"\n", | |
"def iterrows(query, *, prefixes=PREFIXES,\n", | |
" limit=None, verbose: bool = False,\n", | |
" per_request: int = 100_000):\n", | |
" if limit is None:\n", | |
" limit = float('inf')\n", | |
"\n", | |
" graph = open_sparql_graph()\n", | |
"\n", | |
" offset = 0\n", | |
" while offset < limit:\n", | |
" request_limit = min(limit - offset, per_request)\n", | |
" request_query = (f'{query}\\n'\n", | |
" f'OFFSET {offset:d}\\n'\n", | |
" f'LIMIT {request_limit:d}')\n", | |
" logging.info(\"graph.query('''\\n%s\\n''')\", request_query)\n", | |
" result = graph.query(request_query)\n", | |
" if not offset:\n", | |
" yield (v.toPython().removeprefix('?') for v in result.vars)\n", | |
" for n, values in enumerate(result, 1):\n", | |
" yield (v.toPython() if v is not None else None for v in values)\n", | |
" if n < request_limit:\n", | |
" return\n", | |
" offset += n\n", | |
"\n", | |
"\n", | |
"def read_sparql_query(query, *, limit=None, **kwargs):\n", | |
" rows = iterrows(query, limit=limit)\n", | |
" columns = list(next(rows))\n", | |
" return pd.DataFrame.from_records(rows, columns=columns, **kwargs)\n", | |
"\n", | |
"\n", | |
"def read_glottocodes(*, path=CSV_PATH, encoding='utf-8'):\n", | |
" if not path.exists():\n", | |
" df = read_sparql_query(SPARQL_QUERY, index='glottocode').astype('string')\n", | |
" df.to_csv(path, encoding=encoding)\n", | |
" return pd.read_csv(path, index_col='glottocode', encoding=encoding).astype('string')\n", | |
"\n", | |
"\n", | |
"wf = read_glottocodes()\n", | |
"\n", | |
"wf.info(memory_usage='deep')\n", | |
"assert wf.index.is_monotonic_increasing\n", | |
"wf.head(10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "bad4f455-99ae-46f2-ab20-af765dc20a4f", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>qid</th>\n", | |
" <th>name</th>\n", | |
" <th>title</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>glottocode</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>ainu1252</th>\n", | |
" <td>Q27969</td>\n", | |
" <td>Ainu</td>\n", | |
" <td>Ainu_languages</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ainu1252</th>\n", | |
" <td>Q50111972</td>\n", | |
" <td>Ainu</td>\n", | |
" <td><NA></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>andr1246</th>\n", | |
" <td>Q30301408</td>\n", | |
" <td>Andro</td>\n", | |
" <td><NA></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>andr1246</th>\n", | |
" <td>Q55603949</td>\n", | |
" <td>Andro</td>\n", | |
" <td>Andro_language</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>araf1243</th>\n", | |
" <td>Q4783702</td>\n", | |
" <td>Arafundi</td>\n", | |
" <td>Arafundi_languages</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>yulp1239</th>\n", | |
" <td>Q106554801</td>\n", | |
" <td>Yulparirra</td>\n", | |
" <td>Yulparirra_language</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>zeme1240</th>\n", | |
" <td>Q56373</td>\n", | |
" <td>Zeme</td>\n", | |
" <td>Zeme_language</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>zeme1240</th>\n", | |
" <td>Q21491053</td>\n", | |
" <td>Zeme Naga</td>\n", | |
" <td><NA></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>zena1250</th>\n", | |
" <td>Q2293952</td>\n", | |
" <td>Zenati</td>\n", | |
" <td>Zenati_languages</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>zena1250</th>\n", | |
" <td>Q2741732</td>\n", | |
" <td>Northern Berber</td>\n", | |
" <td>Northern_Berber_languages</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>163 rows × 3 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" qid name title\n", | |
"glottocode \n", | |
"ainu1252 Q27969 Ainu Ainu_languages\n", | |
"ainu1252 Q50111972 Ainu <NA>\n", | |
"andr1246 Q30301408 Andro <NA>\n", | |
"andr1246 Q55603949 Andro Andro_language\n", | |
"araf1243 Q4783702 Arafundi Arafundi_languages\n", | |
"... ... ... ...\n", | |
"yulp1239 Q106554801 Yulparirra Yulparirra_language\n", | |
"zeme1240 Q56373 Zeme Zeme_language\n", | |
"zeme1240 Q21491053 Zeme Naga <NA>\n", | |
"zena1250 Q2293952 Zenati Zenati_languages\n", | |
"zena1250 Q2741732 Northern Berber Northern_Berber_languages\n", | |
"\n", | |
"[163 rows x 3 columns]" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"KEEP_ALL = False\n", | |
"\n", | |
"wf[wf.index.duplicated(keep=KEEP_ALL)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "bd95e9b4-7f08-4f71-846b-feba88170dfa", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"Index: 10621 entries, aant1238 to zyph1238\n", | |
"Data columns (total 6 columns):\n", | |
" # Column Non-Null Count Dtype \n", | |
"--- ------ -------------- ----- \n", | |
" 0 name 10621 non-null string\n", | |
" 1 level 10621 non-null string\n", | |
" 2 qid 10621 non-null string\n", | |
" 3 title 8415 non-null string\n", | |
" 4 qid_wd 10610 non-null string\n", | |
" 5 title_wd 8482 non-null string\n", | |
"dtypes: string(6)\n", | |
"memory usage: 4.7 MB\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>name</th>\n", | |
" <th>level</th>\n", | |
" <th>qid</th>\n", | |
" <th>title</th>\n", | |
" <th>qid_wd</th>\n", | |
" <th>title_wd</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>glottocode</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>aant1238</th>\n", | |
" <td>Aantantara</td>\n", | |
" <td>dialect</td>\n", | |
" <td>Q31312216</td>\n", | |
" <td><NA></td>\n", | |
" <td>Q31312216</td>\n", | |
" <td><NA></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>aari1238</th>\n", | |
" <td>Aari-Gayil</td>\n", | |
" <td>family</td>\n", | |
" <td>Q85516014</td>\n", | |
" <td><NA></td>\n", | |
" <td>Q85516014</td>\n", | |
" <td><NA></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>aari1239</th>\n", | |
" <td>Aari</td>\n", | |
" <td>language</td>\n", | |
" <td>Q7495</td>\n", | |
" <td>Aari_language</td>\n", | |
" <td>Q7495</td>\n", | |
" <td>Aari_language</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>aari1240</th>\n", | |
" <td>Aariya</td>\n", | |
" <td>language</td>\n", | |
" <td>Q4661732</td>\n", | |
" <td>Aariya_language</td>\n", | |
" <td>Q4661732</td>\n", | |
" <td>Aariya_language</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>aasa1238</th>\n", | |
" <td>Aasax</td>\n", | |
" <td>language</td>\n", | |
" <td>Q56620</td>\n", | |
" <td>Asa_language</td>\n", | |
" <td>Q56620</td>\n", | |
" <td>Asa_language</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" name level qid title qid_wd \\\n", | |
"glottocode \n", | |
"aant1238 Aantantara dialect Q31312216 <NA> Q31312216 \n", | |
"aari1238 Aari-Gayil family Q85516014 <NA> Q85516014 \n", | |
"aari1239 Aari language Q7495 Aari_language Q7495 \n", | |
"aari1240 Aariya language Q4661732 Aariya_language Q4661732 \n", | |
"aasa1238 Aasax language Q56620 Asa_language Q56620 \n", | |
"\n", | |
" title_wd \n", | |
"glottocode \n", | |
"aant1238 <NA> \n", | |
"aari1238 <NA> \n", | |
"aari1239 Aari_language \n", | |
"aari1240 Aariya_language \n", | |
"aasa1238 Asa_language " | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = gf.join(wf, on='glottocode', rsuffix='_wd').drop(['category', 'name_wd', 'iso639_3'], axis='columns')\n", | |
"\n", | |
"df.info(memory_usage='deep')\n", | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "a5261e90-de74-4b54-8b29-38c69e86ddf4", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>name</th>\n", | |
" <th>level</th>\n", | |
" <th>qid</th>\n", | |
" <th>title</th>\n", | |
" <th>qid_wd</th>\n", | |
" <th>title_wd</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>glottocode</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>ainu1252</th>\n", | |
" <td>Ainu</td>\n", | |
" <td>family</td>\n", | |
" <td>Q50111972</td>\n", | |
" <td><NA></td>\n", | |
" <td>Q27969</td>\n", | |
" <td>Ainu_languages</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ainu1252</th>\n", | |
" <td>Ainu</td>\n", | |
" <td>family</td>\n", | |
" <td>Q50111972</td>\n", | |
" <td><NA></td>\n", | |
" <td>Q50111972</td>\n", | |
" <td><NA></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>andr1246</th>\n", | |
" <td>Andro</td>\n", | |
" <td>language</td>\n", | |
" <td>Q30301408</td>\n", | |
" <td><NA></td>\n", | |
" <td>Q30301408</td>\n", | |
" <td><NA></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>andr1246</th>\n", | |
" <td>Andro</td>\n", | |
" <td>language</td>\n", | |
" <td>Q30301408</td>\n", | |
" <td><NA></td>\n", | |
" <td>Q55603949</td>\n", | |
" <td>Andro_language</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>araf1243</th>\n", | |
" <td>Arafundi</td>\n", | |
" <td>family</td>\n", | |
" <td>Q11170629</td>\n", | |
" <td><NA></td>\n", | |
" <td>Q4783702</td>\n", | |
" <td>Arafundi_languages</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>yulp1239</th>\n", | |
" <td>Yulparija</td>\n", | |
" <td>language</td>\n", | |
" <td>Q17319895</td>\n", | |
" <td><NA></td>\n", | |
" <td>Q106554801</td>\n", | |
" <td>Yulparirra_language</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>zeme1240</th>\n", | |
" <td>Zeme Naga</td>\n", | |
" <td>language</td>\n", | |
" <td>Q21491053</td>\n", | |
" <td><NA></td>\n", | |
" <td>Q56373</td>\n", | |
" <td>Zeme_language</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>zeme1240</th>\n", | |
" <td>Zeme Naga</td>\n", | |
" <td>language</td>\n", | |
" <td>Q21491053</td>\n", | |
" <td><NA></td>\n", | |
" <td>Q21491053</td>\n", | |
" <td><NA></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>zena1250</th>\n", | |
" <td>Zenatic</td>\n", | |
" <td>family</td>\n", | |
" <td>Q2741732</td>\n", | |
" <td>Northern_Berber_languages</td>\n", | |
" <td>Q2293952</td>\n", | |
" <td>Zenati_languages</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>zena1250</th>\n", | |
" <td>Zenatic</td>\n", | |
" <td>family</td>\n", | |
" <td>Q2741732</td>\n", | |
" <td>Northern_Berber_languages</td>\n", | |
" <td>Q2741732</td>\n", | |
" <td>Northern_Berber_languages</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>163 rows × 6 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" name level qid title \\\n", | |
"glottocode \n", | |
"ainu1252 Ainu family Q50111972 <NA> \n", | |
"ainu1252 Ainu family Q50111972 <NA> \n", | |
"andr1246 Andro language Q30301408 <NA> \n", | |
"andr1246 Andro language Q30301408 <NA> \n", | |
"araf1243 Arafundi family Q11170629 <NA> \n", | |
"... ... ... ... ... \n", | |
"yulp1239 Yulparija language Q17319895 <NA> \n", | |
"zeme1240 Zeme Naga language Q21491053 <NA> \n", | |
"zeme1240 Zeme Naga language Q21491053 <NA> \n", | |
"zena1250 Zenatic family Q2741732 Northern_Berber_languages \n", | |
"zena1250 Zenatic family Q2741732 Northern_Berber_languages \n", | |
"\n", | |
" qid_wd title_wd \n", | |
"glottocode \n", | |
"ainu1252 Q27969 Ainu_languages \n", | |
"ainu1252 Q50111972 <NA> \n", | |
"andr1246 Q30301408 <NA> \n", | |
"andr1246 Q55603949 Andro_language \n", | |
"araf1243 Q4783702 Arafundi_languages \n", | |
"... ... ... \n", | |
"yulp1239 Q106554801 Yulparirra_language \n", | |
"zeme1240 Q56373 Zeme_language \n", | |
"zeme1240 Q21491053 <NA> \n", | |
"zena1250 Q2293952 Zenati_languages \n", | |
"zena1250 Q2741732 Northern_Berber_languages \n", | |
"\n", | |
"[163 rows x 6 columns]" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df[df.index.duplicated(keep=KEEP_ALL)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "ad1703bd-0bc1-404c-9a03-24f3e316dcc4", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"Index: 110 entries, ainu1252 to zena1250\n", | |
"Data columns (total 6 columns):\n", | |
" # Column Non-Null Count Dtype \n", | |
"--- ------ -------------- ----- \n", | |
" 0 name 110 non-null string\n", | |
" 1 level 110 non-null string\n", | |
" 2 title 48 non-null string\n", | |
" 3 title_wd 98 non-null string\n", | |
" 4 qid 110 non-null string\n", | |
" 5 qid_wd 110 non-null string\n", | |
"dtypes: string(6)\n", | |
"memory usage: 49.1 KB\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>name</th>\n", | |
" <th>level</th>\n", | |
" <th>title</th>\n", | |
" <th>title_wd</th>\n", | |
" <th>qid</th>\n", | |
" <th>qid_wd</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>glottocode</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>kumi1248</th>\n", | |
" <td>Tipai</td>\n", | |
" <td>language</td>\n", | |
" <td>Ipai_language</td>\n", | |
" <td>Tiipai_language</td>\n", | |
" <td>Q3027474</td>\n", | |
" <td>Q3027471</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>kumi1248</th>\n", | |
" <td>Tipai</td>\n", | |
" <td>language</td>\n", | |
" <td>Ipai_language</td>\n", | |
" <td>Kumeyaay_language</td>\n", | |
" <td>Q3027474</td>\n", | |
" <td>Q4910139</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mose1249</th>\n", | |
" <td>Mosetén-Chimané</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Chimane_language</td>\n", | |
" <td>Q15548035</td>\n", | |
" <td>Q35950</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mose1249</th>\n", | |
" <td>Mosetén-Chimané</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td><NA></td>\n", | |
" <td>Q15548035</td>\n", | |
" <td>Q25395221</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" name level title title_wd \\\n", | |
"glottocode \n", | |
"kumi1248 Tipai language Ipai_language Tiipai_language \n", | |
"kumi1248 Tipai language Ipai_language Kumeyaay_language \n", | |
"mose1249 Mosetén-Chimané language <NA> Chimane_language \n", | |
"mose1249 Mosetén-Chimané language <NA> <NA> \n", | |
"\n", | |
" qid qid_wd \n", | |
"glottocode \n", | |
"kumi1248 Q3027474 Q3027471 \n", | |
"kumi1248 Q3027474 Q4910139 \n", | |
"mose1249 Q15548035 Q35950 \n", | |
"mose1249 Q15548035 Q25395221 " | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"mismatch = df.loc[df['qid'] != df['qid_wd'], ['name', 'level', 'title', 'title_wd', 'qid', 'qid_wd']]\n", | |
"\n", | |
"mismatch.info(memory_usage='deep')\n", | |
"mismatch[mismatch.index.duplicated(keep=KEEP_ALL)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "5103047c-6691-495d-a65f-b91982848f86", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>name</th>\n", | |
" <th>level</th>\n", | |
" <th>title</th>\n", | |
" <th>title_wd</th>\n", | |
" <th>qid</th>\n", | |
" <th>qid_wd</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>glottocode</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>ainu1252</th>\n", | |
" <td>Ainu</td>\n", | |
" <td>family</td>\n", | |
" <td><NA></td>\n", | |
" <td>Ainu_languages</td>\n", | |
" <td>Q50111972</td>\n", | |
" <td>Q27969</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>akua1239</th>\n", | |
" <td>Akuapem</td>\n", | |
" <td>dialect</td>\n", | |
" <td>Twi</td>\n", | |
" <td>Akuapem_dialect</td>\n", | |
" <td>Q36850</td>\n", | |
" <td>Q31150449</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>amii1238</th>\n", | |
" <td>Ami</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td><NA></td>\n", | |
" <td>Q12626835</td>\n", | |
" <td>Q10408315</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>andr1246</th>\n", | |
" <td>Andro</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Andro_language</td>\n", | |
" <td>Q30301408</td>\n", | |
" <td>Q55603949</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>aoua1234</th>\n", | |
" <td>A'ou</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>A%27ou_language</td>\n", | |
" <td>Q17284871</td>\n", | |
" <td>Q16109994</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>araf1243</th>\n", | |
" <td>Arafundi</td>\n", | |
" <td>family</td>\n", | |
" <td><NA></td>\n", | |
" <td>Arafundi_languages</td>\n", | |
" <td>Q11170629</td>\n", | |
" <td>Q4783702</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>arit1239</th>\n", | |
" <td>Aritinngitigh</td>\n", | |
" <td>language</td>\n", | |
" <td>Arritinngithigh_language</td>\n", | |
" <td>Adithinngithigh_language</td>\n", | |
" <td>Q4796002</td>\n", | |
" <td>Q4683034</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>assy1241</th>\n", | |
" <td>Assyrian Neo-Aramaic</td>\n", | |
" <td>language</td>\n", | |
" <td>Suret_language</td>\n", | |
" <td>Ashurian_Aramaic</td>\n", | |
" <td>Q29440</td>\n", | |
" <td>Q24915992</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>atat1238</th>\n", | |
" <td>Atatláhuca Mixtec</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Atatl%C3%A1huca%E2%80%93San_Miguel_Mixtec</td>\n", | |
" <td>Q32093046</td>\n", | |
" <td>Q12953721</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>azte1234</th>\n", | |
" <td>Aztec</td>\n", | |
" <td>family</td>\n", | |
" <td>Nahuatl</td>\n", | |
" <td>Nahuan_languages</td>\n", | |
" <td>Q13300</td>\n", | |
" <td>Q11965602</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>bala1242</th>\n", | |
" <td>Bala (China)</td>\n", | |
" <td>dialect</td>\n", | |
" <td><NA></td>\n", | |
" <td>Bala_language_(China)</td>\n", | |
" <td>Q107342080</td>\n", | |
" <td>Q86730632</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>bari1298</th>\n", | |
" <td>Barikewa</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Omati_language</td>\n", | |
" <td>Q63214981</td>\n", | |
" <td>Q7089905</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>bata1301</th>\n", | |
" <td>Batak</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Batak_language_(Philippines)</td>\n", | |
" <td>Q50934420</td>\n", | |
" <td>Q3450443</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>bori1243</th>\n", | |
" <td>Bori-Karko</td>\n", | |
" <td>language</td>\n", | |
" <td>Adi_languages</td>\n", | |
" <td>Bori_language</td>\n", | |
" <td>Q56440</td>\n", | |
" <td>Q4945106</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>cent2226</th>\n", | |
" <td>Central Maipuran</td>\n", | |
" <td>family</td>\n", | |
" <td><NA></td>\n", | |
" <td>Paresi%E2%80%93Waura_languages</td>\n", | |
" <td>Q97959215</td>\n", | |
" <td>Q7136862</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>chon1248</th>\n", | |
" <td>Chono</td>\n", | |
" <td>language</td>\n", | |
" <td>Kakauhua_language</td>\n", | |
" <td>Chono_language</td>\n", | |
" <td>Q3507948</td>\n", | |
" <td>Q5104704</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>cuoi1242</th>\n", | |
" <td>Cuoi</td>\n", | |
" <td>family</td>\n", | |
" <td><NA></td>\n", | |
" <td>Cuoi_language</td>\n", | |
" <td>Q12629405</td>\n", | |
" <td>Q3380501</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>damu1236</th>\n", | |
" <td>Damu</td>\n", | |
" <td>language</td>\n", | |
" <td>Adi_languages</td>\n", | |
" <td>Damu_language</td>\n", | |
" <td>Q56440</td>\n", | |
" <td>Q17002115</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>dhan1265</th>\n", | |
" <td>Dewas-Done Danuwar</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Danwar_language</td>\n", | |
" <td>Q62663667</td>\n", | |
" <td>Q3522797</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>elal1235</th>\n", | |
" <td>El Alto Zapotec</td>\n", | |
" <td>language</td>\n", | |
" <td>Zoogocho_Zapotec</td>\n", | |
" <td>El_Alto_Zapotec</td>\n", | |
" <td>Q8074100</td>\n", | |
" <td>Q5350733</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>esto1258</th>\n", | |
" <td>Estonian</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Estonian_language</td>\n", | |
" <td>Q12361545</td>\n", | |
" <td>Q9072</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>garr1260</th>\n", | |
" <td>Garrwan</td>\n", | |
" <td>family</td>\n", | |
" <td><NA></td>\n", | |
" <td>Garawan_languages</td>\n", | |
" <td>Q12631364</td>\n", | |
" <td>Q5521951</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>hava1248</th>\n", | |
" <td>Havasupai-Walapai-Yavapai</td>\n", | |
" <td>language</td>\n", | |
" <td>Havasupai%E2%80%93Hualapai_language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Q3565286</td>\n", | |
" <td>Q111366384</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>hwar1238</th>\n", | |
" <td>Qwara</td>\n", | |
" <td>dialect</td>\n", | |
" <td><NA></td>\n", | |
" <td>Qwara_dialect</td>\n", | |
" <td>Q53765647</td>\n", | |
" <td>Q56736</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>indo1316</th>\n", | |
" <td>Standard Indonesian</td>\n", | |
" <td>language</td>\n", | |
" <td>Indonesian_language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Q9240</td>\n", | |
" <td>Q110620923</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>kala1399</th>\n", | |
" <td>Kalaallisut</td>\n", | |
" <td>language</td>\n", | |
" <td>Greenlandic_language</td>\n", | |
" <td>West_Greenlandic</td>\n", | |
" <td>Q25355</td>\n", | |
" <td>Q15665351</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>kalm1243</th>\n", | |
" <td>Oirad-Kalmyk-Darkhat</td>\n", | |
" <td>language</td>\n", | |
" <td>Kalmyk_Oirat</td>\n", | |
" <td>Oirat_language</td>\n", | |
" <td>Q33634</td>\n", | |
" <td>Q56959</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>kati1270</th>\n", | |
" <td>Katë</td>\n", | |
" <td>language</td>\n", | |
" <td>Kata-vari_dialect</td>\n", | |
" <td>Kamkata-vari_language</td>\n", | |
" <td>Q3449784</td>\n", | |
" <td>Q2605045</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>kaur1271</th>\n", | |
" <td>Kaure-Narau</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Kaure_language</td>\n", | |
" <td>Q12634336</td>\n", | |
" <td>Q20526532</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>kawi1241</th>\n", | |
" <td>Kawi</td>\n", | |
" <td>language</td>\n", | |
" <td>Kawi_language</td>\n", | |
" <td>Old_Javanese</td>\n", | |
" <td>Q49341</td>\n", | |
" <td>Q49340</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>kend1253</th>\n", | |
" <td>Kendeje</td>\n", | |
" <td>language</td>\n", | |
" <td>Teribe_language</td>\n", | |
" <td>Kendeje_language</td>\n", | |
" <td>Q36533</td>\n", | |
" <td>Q56895</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>kulo1237</th>\n", | |
" <td>Kulon-Pazeh</td>\n", | |
" <td>language</td>\n", | |
" <td>Pazeh_language</td>\n", | |
" <td>Kulon_language</td>\n", | |
" <td>Q36435</td>\n", | |
" <td>Q11182000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>kumi1248</th>\n", | |
" <td>Tipai</td>\n", | |
" <td>language</td>\n", | |
" <td>Ipai_language</td>\n", | |
" <td>Tiipai_language</td>\n", | |
" <td>Q3027474</td>\n", | |
" <td>Q3027471</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>kumi1248</th>\n", | |
" <td>Tipai</td>\n", | |
" <td>language</td>\n", | |
" <td>Ipai_language</td>\n", | |
" <td>Kumeyaay_language</td>\n", | |
" <td>Q3027474</td>\n", | |
" <td>Q4910139</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>lari1253</th>\n", | |
" <td>Larestani</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Achomi_language</td>\n", | |
" <td>Q33468</td>\n", | |
" <td>Q4699526</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>long1252</th>\n", | |
" <td>Longdu</td>\n", | |
" <td>dialect</td>\n", | |
" <td>Zhongshan_Min</td>\n", | |
" <td>Longdu_dialect</td>\n", | |
" <td>Q8070958</td>\n", | |
" <td>Q6673704</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>loup1243</th>\n", | |
" <td>Loup A</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Loup_language</td>\n", | |
" <td>Q27921265</td>\n", | |
" <td>Q6689698</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>maha1308</th>\n", | |
" <td>Mahakam Kenyah</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td><NA></td>\n", | |
" <td>Q12953633</td>\n", | |
" <td>Q12953631</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mala1480</th>\n", | |
" <td>Malayic Dayak</td>\n", | |
" <td>language</td>\n", | |
" <td>Bamayo_language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Q3514892</td>\n", | |
" <td>Q110162108</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mans1258</th>\n", | |
" <td>Northern Mansi</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Mansi_language</td>\n", | |
" <td>Q30304537</td>\n", | |
" <td>Q33759</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mark1255</th>\n", | |
" <td>Markweeta</td>\n", | |
" <td>language</td>\n", | |
" <td>Markwet_language</td>\n", | |
" <td>Nandi%E2%80%93Markweta_languages</td>\n", | |
" <td>Q56874</td>\n", | |
" <td>Q11028135</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>masa1299</th>\n", | |
" <td>Masaaba</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Masaba_language</td>\n", | |
" <td>Q12952814</td>\n", | |
" <td>Q3740241</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>masb1237</th>\n", | |
" <td>Masbate Sorsogon</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Sorsogon_language</td>\n", | |
" <td>Q16113356</td>\n", | |
" <td>Q7563749</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mato1250</th>\n", | |
" <td>Mator-Taigi-Karagas</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Mator_language</td>\n", | |
" <td>Q20669419</td>\n", | |
" <td>Q36453</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mixe1286</th>\n", | |
" <td>Mixe</td>\n", | |
" <td>family</td>\n", | |
" <td>Mixean_languages</td>\n", | |
" <td>Mixe_languages</td>\n", | |
" <td>Q36225</td>\n", | |
" <td>Q3833010</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mose1249</th>\n", | |
" <td>Mosetén-Chimané</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Chimane_language</td>\n", | |
" <td>Q15548035</td>\n", | |
" <td>Q35950</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mose1249</th>\n", | |
" <td>Mosetén-Chimané</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td><NA></td>\n", | |
" <td>Q15548035</td>\n", | |
" <td>Q25395221</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mosi1247</th>\n", | |
" <td>Akie</td>\n", | |
" <td>language</td>\n", | |
" <td>Nandi%E2%80%93Markweta_languages</td>\n", | |
" <td>Mosiro_language</td>\n", | |
" <td>Q11028135</td>\n", | |
" <td>Q6916288</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ncan1245</th>\n", | |
" <td>Ncane-Mungong</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Noni_language</td>\n", | |
" <td>Q11297920</td>\n", | |
" <td>Q36072</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ndyu1242</th>\n", | |
" <td>Aukan</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Ndyuka_language</td>\n", | |
" <td>Q2659044</td>\n", | |
" <td>Q35037</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>nisa1239</th>\n", | |
" <td>Nisa-Anasi</td>\n", | |
" <td>language</td>\n", | |
" <td>Nisa_language</td>\n", | |
" <td>Nisa-Anasi_language</td>\n", | |
" <td>Q13593518</td>\n", | |
" <td>Q4751795</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>noir1238</th>\n", | |
" <td>Noiri</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Bhilori_language</td>\n", | |
" <td>Q12953774</td>\n", | |
" <td>Q4901734</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>noma1263</th>\n", | |
" <td>Nomatsiguenga</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Nomatsiguenga_language</td>\n", | |
" <td>Q1995859</td>\n", | |
" <td>Q3342992</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>nort2930</th>\n", | |
" <td>Northeast Kiwai</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Kiwai_language</td>\n", | |
" <td>Q11732324</td>\n", | |
" <td>Q6418846</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>nort2937</th>\n", | |
" <td>Northern Hill/Valley Yokuts</td>\n", | |
" <td>dialect</td>\n", | |
" <td>Northern_Valley_Yokuts</td>\n", | |
" <td>Kings_River_Yokuts</td>\n", | |
" <td>Q85789777</td>\n", | |
" <td>Q6413014</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>ocot1243</th>\n", | |
" <td>Ocotepec Mixtec</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>%C3%91um%C3%AD_Mixtec</td>\n", | |
" <td>Q25559575</td>\n", | |
" <td>Q8078669</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>otom1276</th>\n", | |
" <td>Otomaco-Taparita</td>\n", | |
" <td>family</td>\n", | |
" <td>Otomaco_language</td>\n", | |
" <td>Otom%C3%A1koan_languages</td>\n", | |
" <td>Q16879234</td>\n", | |
" <td>Q3217503</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>pale1264</th>\n", | |
" <td>Palembang</td>\n", | |
" <td>dialect</td>\n", | |
" <td><NA></td>\n", | |
" <td>Palembang_language</td>\n", | |
" <td>Q25559510</td>\n", | |
" <td>Q12497929</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>panj1256</th>\n", | |
" <td>Eastern Panjabi</td>\n", | |
" <td>language</td>\n", | |
" <td>Punjabi_language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Q58635</td>\n", | |
" <td>Q28164079</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>peno1244</th>\n", | |
" <td>Peñoles Mixtec</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Estetla_Mixtec</td>\n", | |
" <td>Q42411307</td>\n", | |
" <td>Q5401071</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>poch1244</th>\n", | |
" <td>Pochutec</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Pochutec_language</td>\n", | |
" <td>Q42968898</td>\n", | |
" <td>Q2427341</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>puwa1234</th>\n", | |
" <td>Puwa Yi</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Phowa_language</td>\n", | |
" <td>Q25559431</td>\n", | |
" <td>Q7187959</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>rian1260</th>\n", | |
" <td>Riang</td>\n", | |
" <td>family</td>\n", | |
" <td><NA></td>\n", | |
" <td>Riang_language</td>\n", | |
" <td>Q42353409</td>\n", | |
" <td>Q2741615</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>sana1295</th>\n", | |
" <td>Sanaani Arabic</td>\n", | |
" <td>language</td>\n", | |
" <td>Yemeni_Arabic</td>\n", | |
" <td>San%CA%BDani_Arabic</td>\n", | |
" <td>Q1686766</td>\n", | |
" <td>Q56578</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>sanf1262</th>\n", | |
" <td>San Francisco Matlatzinca</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Matlatzinca_language</td>\n", | |
" <td>Q12953704</td>\n", | |
" <td>Q3832945</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>sanj1285</th>\n", | |
" <td>San Juan Atzingo Popoloca</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Southern_Popoloca_language</td>\n", | |
" <td>Q12953819</td>\n", | |
" <td>Q7570327</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>sanl1248</th>\n", | |
" <td>San Luís Temalacayuca Popoloca</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Northern_Popoloca_language</td>\n", | |
" <td>Q25559602</td>\n", | |
" <td>Q7058861</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>sant1454</th>\n", | |
" <td>Santa Inés Ahuatempan Popoloca</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Western_Popoloca_language</td>\n", | |
" <td>Q42365276</td>\n", | |
" <td>Q7988174</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>sate1242</th>\n", | |
" <td>Ems-Weser Frisian</td>\n", | |
" <td>language</td>\n", | |
" <td>Saterland_Frisian_language</td>\n", | |
" <td>East_Frisian_language</td>\n", | |
" <td>Q27154</td>\n", | |
" <td>Q494355</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>shap1240</th>\n", | |
" <td>Shapsug</td>\n", | |
" <td>dialect</td>\n", | |
" <td>Shapsug_Adyghe_dialect</td>\n", | |
" <td>Kfar_Kama_Adyghe_dialect</td>\n", | |
" <td>Q12813044</td>\n", | |
" <td>Q6398657</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>sini1245</th>\n", | |
" <td>Sinitic</td>\n", | |
" <td>family</td>\n", | |
" <td>Sinitic_languages</td>\n", | |
" <td>Chinese_language</td>\n", | |
" <td>Q33857</td>\n", | |
" <td>Q7850</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>sira1267</th>\n", | |
" <td>Sirayaic</td>\n", | |
" <td>language</td>\n", | |
" <td>Sirayaic_languages</td>\n", | |
" <td>Siraya_language</td>\n", | |
" <td>Q55630686</td>\n", | |
" <td>Q716604</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>sota1242</th>\n", | |
" <td>Sota Kanum</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Nggarna_language</td>\n", | |
" <td>Q12952568</td>\n", | |
" <td>Q85788907</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>soul1243</th>\n", | |
" <td>Souletin Basque</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Souletin_dialect</td>\n", | |
" <td>Q12953385</td>\n", | |
" <td>Q2746856</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>sout2668</th>\n", | |
" <td>Southern Hindko</td>\n", | |
" <td>language</td>\n", | |
" <td>Hindko</td>\n", | |
" <td><NA></td>\n", | |
" <td>Q382273</td>\n", | |
" <td>Q111326242</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>sout2679</th>\n", | |
" <td>South Estonian</td>\n", | |
" <td>language</td>\n", | |
" <td>V%C3%B5ro_language</td>\n", | |
" <td>South_Estonian</td>\n", | |
" <td>Q32762</td>\n", | |
" <td>Q13295</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>sout2687</th>\n", | |
" <td>Southern Vietnamese</td>\n", | |
" <td>dialect</td>\n", | |
" <td><NA></td>\n", | |
" <td><NA></td>\n", | |
" <td>Q55856412</td>\n", | |
" <td>Q10806348</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>sout2965</th>\n", | |
" <td>Southern Puget Sound Salish</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Whulshootseed_dialect</td>\n", | |
" <td>Q12642471</td>\n", | |
" <td>Q7997684</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>sout2978</th>\n", | |
" <td>Southern East Cree</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>East_Cree</td>\n", | |
" <td>Q12953464</td>\n", | |
" <td>Q282011</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>sout2990</th>\n", | |
" <td>Southern Pastaza Quechua</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Lowland_Peruvian_Quechua</td>\n", | |
" <td>Q25559692</td>\n", | |
" <td>Q6694075</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>sout3212</th>\n", | |
" <td>Southeastern Ngwi</td>\n", | |
" <td>family</td>\n", | |
" <td>Nisoish_languages</td>\n", | |
" <td>Southeastern_Loloish_languages</td>\n", | |
" <td>Q56990</td>\n", | |
" <td>Q16111894</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>taid1248</th>\n", | |
" <td>Tai Do-Mene-Yo</td>\n", | |
" <td>language</td>\n", | |
" <td>Tai_Yo_language</td>\n", | |
" <td>Tai_Do_language</td>\n", | |
" <td>Q7675790</td>\n", | |
" <td>Q7675746</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>taih1245</th>\n", | |
" <td>Tai Pao (Retired)</td>\n", | |
" <td>language</td>\n", | |
" <td>Tai_Pao_language</td>\n", | |
" <td>Tai_Hang_Tong_language</td>\n", | |
" <td>Q7675795</td>\n", | |
" <td>Q7675753</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>tall1235</th>\n", | |
" <td>Tallán</td>\n", | |
" <td>language</td>\n", | |
" <td>Tall%C3%A1n_language</td>\n", | |
" <td>Catacaoan_languages</td>\n", | |
" <td>Q16910468</td>\n", | |
" <td>Q5051139</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>talu1238</th>\n", | |
" <td>Lavu-Yongsheng-Talu</td>\n", | |
" <td>language</td>\n", | |
" <td>Talu_language</td>\n", | |
" <td>Lavu_language</td>\n", | |
" <td>Q48769531</td>\n", | |
" <td>Q16999095</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>tata1257</th>\n", | |
" <td>Tatana</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Sabah_Bisaya_language</td>\n", | |
" <td>Q18643518</td>\n", | |
" <td>Q7395820</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>temb1272</th>\n", | |
" <td>Motembo-Kunda</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Budza_language</td>\n", | |
" <td>Q11013108</td>\n", | |
" <td>Q3046889</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>temb1276</th>\n", | |
" <td>Tenetehara</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Tenetehara_language</td>\n", | |
" <td>Q10322157</td>\n", | |
" <td>Q7699720</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>temn1245</th>\n", | |
" <td>Northern Mel</td>\n", | |
" <td>family</td>\n", | |
" <td><NA></td>\n", | |
" <td>Baga_language</td>\n", | |
" <td>Q16114535</td>\n", | |
" <td>Q35005</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>tibe1272</th>\n", | |
" <td>Tibetan</td>\n", | |
" <td>language</td>\n", | |
" <td>Lhasa_Tibetan</td>\n", | |
" <td>Central_Tibetan</td>\n", | |
" <td>Q34271</td>\n", | |
" <td>Q5061915</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>timo1237</th>\n", | |
" <td>Timote-Cuica</td>\n", | |
" <td>language</td>\n", | |
" <td>Timote_language</td>\n", | |
" <td>Timotean_languages</td>\n", | |
" <td>Q7806995</td>\n", | |
" <td>Q3217540</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>tsis1238</th>\n", | |
" <td>Salka-Tsishingini</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Shingini_language</td>\n", | |
" <td>Q13123571</td>\n", | |
" <td>Q35199</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>tund1255</th>\n", | |
" <td>Eastern Tundra Nenets</td>\n", | |
" <td>dialect</td>\n", | |
" <td>Yurats_language</td>\n", | |
" <td>Tundra_Nenets_language</td>\n", | |
" <td>Q34252</td>\n", | |
" <td>Q1564258</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>uain1239</th>\n", | |
" <td>Uainuma-Mariate</td>\n", | |
" <td>language</td>\n", | |
" <td>Wainum%C3%A1-Mariat%C3%A9_language</td>\n", | |
" <td>Mariat%C3%A9_language</td>\n", | |
" <td>Q16910017</td>\n", | |
" <td>Q6762506</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>vase1234</th>\n", | |
" <td>Northern Ju</td>\n", | |
" <td>language</td>\n", | |
" <td>Sekele_language</td>\n", | |
" <td>!O!ung_language</td>\n", | |
" <td>Q56528</td>\n", | |
" <td>Q3832974</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>waga1262</th>\n", | |
" <td>Wagawaga (Retired)</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Wagawaga_language_(New_Guinea)</td>\n", | |
" <td>Q16112427</td>\n", | |
" <td>Q7959485</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>west2340</th>\n", | |
" <td>Western Aragonese</td>\n", | |
" <td>dialect</td>\n", | |
" <td>Ans%C3%B3_Aragonese</td>\n", | |
" <td>Western_Aragonese</td>\n", | |
" <td>Q3574358</td>\n", | |
" <td>Q3574028</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>west2488</th>\n", | |
" <td>Western Krahn</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Western_Krahn_language</td>\n", | |
" <td>Q35809</td>\n", | |
" <td>Q10975611</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>wudi1238</th>\n", | |
" <td>Wuding-Luquan Yi</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Nasu_language</td>\n", | |
" <td>Q25559456</td>\n", | |
" <td>Q56403</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>wyan1247</th>\n", | |
" <td>Huron-Wyandot</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Wyandot_language</td>\n", | |
" <td>Q3567223</td>\n", | |
" <td>Q1185119</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>xian1249</th>\n", | |
" <td>Xiandao</td>\n", | |
" <td>dialect</td>\n", | |
" <td><NA></td>\n", | |
" <td><NA></td>\n", | |
" <td>Q12953305</td>\n", | |
" <td>Q10884275</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>xinc1246</th>\n", | |
" <td>Xinca-Guazacapan</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Guazacap%C3%A1n_language</td>\n", | |
" <td>Q53428794</td>\n", | |
" <td>Q19572028</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>yare1249</th>\n", | |
" <td>Yareni Zapotec</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Ixtl%C3%A1n_Zapotec</td>\n", | |
" <td>Q12645368</td>\n", | |
" <td>Q6101185</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>yela1238</th>\n", | |
" <td>Yela-Kela</td>\n", | |
" <td>language</td>\n", | |
" <td>Yela-Kela_language</td>\n", | |
" <td>Yela_language</td>\n", | |
" <td>Q32151338</td>\n", | |
" <td>Q8051428</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>yout1234</th>\n", | |
" <td>Yout Wam</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td><NA></td>\n", | |
" <td>Q63341264</td>\n", | |
" <td>Q31819036</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>yuga1244</th>\n", | |
" <td>Yugambal</td>\n", | |
" <td>language</td>\n", | |
" <td>Yugambeh_language</td>\n", | |
" <td>Yugambal_language</td>\n", | |
" <td>Q16334334</td>\n", | |
" <td>Q3446663</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>yulp1239</th>\n", | |
" <td>Yulparija</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Yulparirra_language</td>\n", | |
" <td>Q17319895</td>\n", | |
" <td>Q106554801</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>zaca1241</th>\n", | |
" <td>Zacatlán-Ahuacatlán-Tepetzintla Nahuatl</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td><NA></td>\n", | |
" <td>Q2514044</td>\n", | |
" <td>Q2204061</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>zeme1240</th>\n", | |
" <td>Zeme Naga</td>\n", | |
" <td>language</td>\n", | |
" <td><NA></td>\n", | |
" <td>Zeme_language</td>\n", | |
" <td>Q21491053</td>\n", | |
" <td>Q56373</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>zena1250</th>\n", | |
" <td>Zenatic</td>\n", | |
" <td>family</td>\n", | |
" <td>Northern_Berber_languages</td>\n", | |
" <td>Zenati_languages</td>\n", | |
" <td>Q2741732</td>\n", | |
" <td>Q2293952</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" name level \\\n", | |
"glottocode \n", | |
"ainu1252 Ainu family \n", | |
"akua1239 Akuapem dialect \n", | |
"amii1238 Ami language \n", | |
"andr1246 Andro language \n", | |
"aoua1234 A'ou language \n", | |
"araf1243 Arafundi family \n", | |
"arit1239 Aritinngitigh language \n", | |
"assy1241 Assyrian Neo-Aramaic language \n", | |
"atat1238 Atatláhuca Mixtec language \n", | |
"azte1234 Aztec family \n", | |
"bala1242 Bala (China) dialect \n", | |
"bari1298 Barikewa language \n", | |
"bata1301 Batak language \n", | |
"bori1243 Bori-Karko language \n", | |
"cent2226 Central Maipuran family \n", | |
"chon1248 Chono language \n", | |
"cuoi1242 Cuoi family \n", | |
"damu1236 Damu language \n", | |
"dhan1265 Dewas-Done Danuwar language \n", | |
"elal1235 El Alto Zapotec language \n", | |
"esto1258 Estonian language \n", | |
"garr1260 Garrwan family \n", | |
"hava1248 Havasupai-Walapai-Yavapai language \n", | |
"hwar1238 Qwara dialect \n", | |
"indo1316 Standard Indonesian language \n", | |
"kala1399 Kalaallisut language \n", | |
"kalm1243 Oirad-Kalmyk-Darkhat language \n", | |
"kati1270 Katë language \n", | |
"kaur1271 Kaure-Narau language \n", | |
"kawi1241 Kawi language \n", | |
"kend1253 Kendeje language \n", | |
"kulo1237 Kulon-Pazeh language \n", | |
"kumi1248 Tipai language \n", | |
"kumi1248 Tipai language \n", | |
"lari1253 Larestani language \n", | |
"long1252 Longdu dialect \n", | |
"loup1243 Loup A language \n", | |
"maha1308 Mahakam Kenyah language \n", | |
"mala1480 Malayic Dayak language \n", | |
"mans1258 Northern Mansi language \n", | |
"mark1255 Markweeta language \n", | |
"masa1299 Masaaba language \n", | |
"masb1237 Masbate Sorsogon language \n", | |
"mato1250 Mator-Taigi-Karagas language \n", | |
"mixe1286 Mixe family \n", | |
"mose1249 Mosetén-Chimané language \n", | |
"mose1249 Mosetén-Chimané language \n", | |
"mosi1247 Akie language \n", | |
"ncan1245 Ncane-Mungong language \n", | |
"ndyu1242 Aukan language \n", | |
"nisa1239 Nisa-Anasi language \n", | |
"noir1238 Noiri language \n", | |
"noma1263 Nomatsiguenga language \n", | |
"nort2930 Northeast Kiwai language \n", | |
"nort2937 Northern Hill/Valley Yokuts dialect \n", | |
"ocot1243 Ocotepec Mixtec language \n", | |
"otom1276 Otomaco-Taparita family \n", | |
"pale1264 Palembang dialect \n", | |
"panj1256 Eastern Panjabi language \n", | |
"peno1244 Peñoles Mixtec language \n", | |
"poch1244 Pochutec language \n", | |
"puwa1234 Puwa Yi language \n", | |
"rian1260 Riang family \n", | |
"sana1295 Sanaani Arabic language \n", | |
"sanf1262 San Francisco Matlatzinca language \n", | |
"sanj1285 San Juan Atzingo Popoloca language \n", | |
"sanl1248 San Luís Temalacayuca Popoloca language \n", | |
"sant1454 Santa Inés Ahuatempan Popoloca language \n", | |
"sate1242 Ems-Weser Frisian language \n", | |
"shap1240 Shapsug dialect \n", | |
"sini1245 Sinitic family \n", | |
"sira1267 Sirayaic language \n", | |
"sota1242 Sota Kanum language \n", | |
"soul1243 Souletin Basque language \n", | |
"sout2668 Southern Hindko language \n", | |
"sout2679 South Estonian language \n", | |
"sout2687 Southern Vietnamese dialect \n", | |
"sout2965 Southern Puget Sound Salish language \n", | |
"sout2978 Southern East Cree language \n", | |
"sout2990 Southern Pastaza Quechua language \n", | |
"sout3212 Southeastern Ngwi family \n", | |
"taid1248 Tai Do-Mene-Yo language \n", | |
"taih1245 Tai Pao (Retired) language \n", | |
"tall1235 Tallán language \n", | |
"talu1238 Lavu-Yongsheng-Talu language \n", | |
"tata1257 Tatana language \n", | |
"temb1272 Motembo-Kunda language \n", | |
"temb1276 Tenetehara language \n", | |
"temn1245 Northern Mel family \n", | |
"tibe1272 Tibetan language \n", | |
"timo1237 Timote-Cuica language \n", | |
"tsis1238 Salka-Tsishingini language \n", | |
"tund1255 Eastern Tundra Nenets dialect \n", | |
"uain1239 Uainuma-Mariate language \n", | |
"vase1234 Northern Ju language \n", | |
"waga1262 Wagawaga (Retired) language \n", | |
"west2340 Western Aragonese dialect \n", | |
"west2488 Western Krahn language \n", | |
"wudi1238 Wuding-Luquan Yi language \n", | |
"wyan1247 Huron-Wyandot language \n", | |
"xian1249 Xiandao dialect \n", | |
"xinc1246 Xinca-Guazacapan language \n", | |
"yare1249 Yareni Zapotec language \n", | |
"yela1238 Yela-Kela language \n", | |
"yout1234 Yout Wam language \n", | |
"yuga1244 Yugambal language \n", | |
"yulp1239 Yulparija language \n", | |
"zaca1241 Zacatlán-Ahuacatlán-Tepetzintla Nahuatl language \n", | |
"zeme1240 Zeme Naga language \n", | |
"zena1250 Zenatic family \n", | |
"\n", | |
" title \\\n", | |
"glottocode \n", | |
"ainu1252 <NA> \n", | |
"akua1239 Twi \n", | |
"amii1238 <NA> \n", | |
"andr1246 <NA> \n", | |
"aoua1234 <NA> \n", | |
"araf1243 <NA> \n", | |
"arit1239 Arritinngithigh_language \n", | |
"assy1241 Suret_language \n", | |
"atat1238 <NA> \n", | |
"azte1234 Nahuatl \n", | |
"bala1242 <NA> \n", | |
"bari1298 <NA> \n", | |
"bata1301 <NA> \n", | |
"bori1243 Adi_languages \n", | |
"cent2226 <NA> \n", | |
"chon1248 Kakauhua_language \n", | |
"cuoi1242 <NA> \n", | |
"damu1236 Adi_languages \n", | |
"dhan1265 <NA> \n", | |
"elal1235 Zoogocho_Zapotec \n", | |
"esto1258 <NA> \n", | |
"garr1260 <NA> \n", | |
"hava1248 Havasupai%E2%80%93Hualapai_language \n", | |
"hwar1238 <NA> \n", | |
"indo1316 Indonesian_language \n", | |
"kala1399 Greenlandic_language \n", | |
"kalm1243 Kalmyk_Oirat \n", | |
"kati1270 Kata-vari_dialect \n", | |
"kaur1271 <NA> \n", | |
"kawi1241 Kawi_language \n", | |
"kend1253 Teribe_language \n", | |
"kulo1237 Pazeh_language \n", | |
"kumi1248 Ipai_language \n", | |
"kumi1248 Ipai_language \n", | |
"lari1253 <NA> \n", | |
"long1252 Zhongshan_Min \n", | |
"loup1243 <NA> \n", | |
"maha1308 <NA> \n", | |
"mala1480 Bamayo_language \n", | |
"mans1258 <NA> \n", | |
"mark1255 Markwet_language \n", | |
"masa1299 <NA> \n", | |
"masb1237 <NA> \n", | |
"mato1250 <NA> \n", | |
"mixe1286 Mixean_languages \n", | |
"mose1249 <NA> \n", | |
"mose1249 <NA> \n", | |
"mosi1247 Nandi%E2%80%93Markweta_languages \n", | |
"ncan1245 <NA> \n", | |
"ndyu1242 <NA> \n", | |
"nisa1239 Nisa_language \n", | |
"noir1238 <NA> \n", | |
"noma1263 <NA> \n", | |
"nort2930 <NA> \n", | |
"nort2937 Northern_Valley_Yokuts \n", | |
"ocot1243 <NA> \n", | |
"otom1276 Otomaco_language \n", | |
"pale1264 <NA> \n", | |
"panj1256 Punjabi_language \n", | |
"peno1244 <NA> \n", | |
"poch1244 <NA> \n", | |
"puwa1234 <NA> \n", | |
"rian1260 <NA> \n", | |
"sana1295 Yemeni_Arabic \n", | |
"sanf1262 <NA> \n", | |
"sanj1285 <NA> \n", | |
"sanl1248 <NA> \n", | |
"sant1454 <NA> \n", | |
"sate1242 Saterland_Frisian_language \n", | |
"shap1240 Shapsug_Adyghe_dialect \n", | |
"sini1245 Sinitic_languages \n", | |
"sira1267 Sirayaic_languages \n", | |
"sota1242 <NA> \n", | |
"soul1243 <NA> \n", | |
"sout2668 Hindko \n", | |
"sout2679 V%C3%B5ro_language \n", | |
"sout2687 <NA> \n", | |
"sout2965 <NA> \n", | |
"sout2978 <NA> \n", | |
"sout2990 <NA> \n", | |
"sout3212 Nisoish_languages \n", | |
"taid1248 Tai_Yo_language \n", | |
"taih1245 Tai_Pao_language \n", | |
"tall1235 Tall%C3%A1n_language \n", | |
"talu1238 Talu_language \n", | |
"tata1257 <NA> \n", | |
"temb1272 <NA> \n", | |
"temb1276 <NA> \n", | |
"temn1245 <NA> \n", | |
"tibe1272 Lhasa_Tibetan \n", | |
"timo1237 Timote_language \n", | |
"tsis1238 <NA> \n", | |
"tund1255 Yurats_language \n", | |
"uain1239 Wainum%C3%A1-Mariat%C3%A9_language \n", | |
"vase1234 Sekele_language \n", | |
"waga1262 <NA> \n", | |
"west2340 Ans%C3%B3_Aragonese \n", | |
"west2488 <NA> \n", | |
"wudi1238 <NA> \n", | |
"wyan1247 <NA> \n", | |
"xian1249 <NA> \n", | |
"xinc1246 <NA> \n", | |
"yare1249 <NA> \n", | |
"yela1238 Yela-Kela_language \n", | |
"yout1234 <NA> \n", | |
"yuga1244 Yugambeh_language \n", | |
"yulp1239 <NA> \n", | |
"zaca1241 <NA> \n", | |
"zeme1240 <NA> \n", | |
"zena1250 Northern_Berber_languages \n", | |
"\n", | |
" title_wd qid qid_wd \n", | |
"glottocode \n", | |
"ainu1252 Ainu_languages Q50111972 Q27969 \n", | |
"akua1239 Akuapem_dialect Q36850 Q31150449 \n", | |
"amii1238 <NA> Q12626835 Q10408315 \n", | |
"andr1246 Andro_language Q30301408 Q55603949 \n", | |
"aoua1234 A%27ou_language Q17284871 Q16109994 \n", | |
"araf1243 Arafundi_languages Q11170629 Q4783702 \n", | |
"arit1239 Adithinngithigh_language Q4796002 Q4683034 \n", | |
"assy1241 Ashurian_Aramaic Q29440 Q24915992 \n", | |
"atat1238 Atatl%C3%A1huca%E2%80%93San_Miguel_Mixtec Q32093046 Q12953721 \n", | |
"azte1234 Nahuan_languages Q13300 Q11965602 \n", | |
"bala1242 Bala_language_(China) Q107342080 Q86730632 \n", | |
"bari1298 Omati_language Q63214981 Q7089905 \n", | |
"bata1301 Batak_language_(Philippines) Q50934420 Q3450443 \n", | |
"bori1243 Bori_language Q56440 Q4945106 \n", | |
"cent2226 Paresi%E2%80%93Waura_languages Q97959215 Q7136862 \n", | |
"chon1248 Chono_language Q3507948 Q5104704 \n", | |
"cuoi1242 Cuoi_language Q12629405 Q3380501 \n", | |
"damu1236 Damu_language Q56440 Q17002115 \n", | |
"dhan1265 Danwar_language Q62663667 Q3522797 \n", | |
"elal1235 El_Alto_Zapotec Q8074100 Q5350733 \n", | |
"esto1258 Estonian_language Q12361545 Q9072 \n", | |
"garr1260 Garawan_languages Q12631364 Q5521951 \n", | |
"hava1248 <NA> Q3565286 Q111366384 \n", | |
"hwar1238 Qwara_dialect Q53765647 Q56736 \n", | |
"indo1316 <NA> Q9240 Q110620923 \n", | |
"kala1399 West_Greenlandic Q25355 Q15665351 \n", | |
"kalm1243 Oirat_language Q33634 Q56959 \n", | |
"kati1270 Kamkata-vari_language Q3449784 Q2605045 \n", | |
"kaur1271 Kaure_language Q12634336 Q20526532 \n", | |
"kawi1241 Old_Javanese Q49341 Q49340 \n", | |
"kend1253 Kendeje_language Q36533 Q56895 \n", | |
"kulo1237 Kulon_language Q36435 Q11182000 \n", | |
"kumi1248 Tiipai_language Q3027474 Q3027471 \n", | |
"kumi1248 Kumeyaay_language Q3027474 Q4910139 \n", | |
"lari1253 Achomi_language Q33468 Q4699526 \n", | |
"long1252 Longdu_dialect Q8070958 Q6673704 \n", | |
"loup1243 Loup_language Q27921265 Q6689698 \n", | |
"maha1308 <NA> Q12953633 Q12953631 \n", | |
"mala1480 <NA> Q3514892 Q110162108 \n", | |
"mans1258 Mansi_language Q30304537 Q33759 \n", | |
"mark1255 Nandi%E2%80%93Markweta_languages Q56874 Q11028135 \n", | |
"masa1299 Masaba_language Q12952814 Q3740241 \n", | |
"masb1237 Sorsogon_language Q16113356 Q7563749 \n", | |
"mato1250 Mator_language Q20669419 Q36453 \n", | |
"mixe1286 Mixe_languages Q36225 Q3833010 \n", | |
"mose1249 Chimane_language Q15548035 Q35950 \n", | |
"mose1249 <NA> Q15548035 Q25395221 \n", | |
"mosi1247 Mosiro_language Q11028135 Q6916288 \n", | |
"ncan1245 Noni_language Q11297920 Q36072 \n", | |
"ndyu1242 Ndyuka_language Q2659044 Q35037 \n", | |
"nisa1239 Nisa-Anasi_language Q13593518 Q4751795 \n", | |
"noir1238 Bhilori_language Q12953774 Q4901734 \n", | |
"noma1263 Nomatsiguenga_language Q1995859 Q3342992 \n", | |
"nort2930 Kiwai_language Q11732324 Q6418846 \n", | |
"nort2937 Kings_River_Yokuts Q85789777 Q6413014 \n", | |
"ocot1243 %C3%91um%C3%AD_Mixtec Q25559575 Q8078669 \n", | |
"otom1276 Otom%C3%A1koan_languages Q16879234 Q3217503 \n", | |
"pale1264 Palembang_language Q25559510 Q12497929 \n", | |
"panj1256 <NA> Q58635 Q28164079 \n", | |
"peno1244 Estetla_Mixtec Q42411307 Q5401071 \n", | |
"poch1244 Pochutec_language Q42968898 Q2427341 \n", | |
"puwa1234 Phowa_language Q25559431 Q7187959 \n", | |
"rian1260 Riang_language Q42353409 Q2741615 \n", | |
"sana1295 San%CA%BDani_Arabic Q1686766 Q56578 \n", | |
"sanf1262 Matlatzinca_language Q12953704 Q3832945 \n", | |
"sanj1285 Southern_Popoloca_language Q12953819 Q7570327 \n", | |
"sanl1248 Northern_Popoloca_language Q25559602 Q7058861 \n", | |
"sant1454 Western_Popoloca_language Q42365276 Q7988174 \n", | |
"sate1242 East_Frisian_language Q27154 Q494355 \n", | |
"shap1240 Kfar_Kama_Adyghe_dialect Q12813044 Q6398657 \n", | |
"sini1245 Chinese_language Q33857 Q7850 \n", | |
"sira1267 Siraya_language Q55630686 Q716604 \n", | |
"sota1242 Nggarna_language Q12952568 Q85788907 \n", | |
"soul1243 Souletin_dialect Q12953385 Q2746856 \n", | |
"sout2668 <NA> Q382273 Q111326242 \n", | |
"sout2679 South_Estonian Q32762 Q13295 \n", | |
"sout2687 <NA> Q55856412 Q10806348 \n", | |
"sout2965 Whulshootseed_dialect Q12642471 Q7997684 \n", | |
"sout2978 East_Cree Q12953464 Q282011 \n", | |
"sout2990 Lowland_Peruvian_Quechua Q25559692 Q6694075 \n", | |
"sout3212 Southeastern_Loloish_languages Q56990 Q16111894 \n", | |
"taid1248 Tai_Do_language Q7675790 Q7675746 \n", | |
"taih1245 Tai_Hang_Tong_language Q7675795 Q7675753 \n", | |
"tall1235 Catacaoan_languages Q16910468 Q5051139 \n", | |
"talu1238 Lavu_language Q48769531 Q16999095 \n", | |
"tata1257 Sabah_Bisaya_language Q18643518 Q7395820 \n", | |
"temb1272 Budza_language Q11013108 Q3046889 \n", | |
"temb1276 Tenetehara_language Q10322157 Q7699720 \n", | |
"temn1245 Baga_language Q16114535 Q35005 \n", | |
"tibe1272 Central_Tibetan Q34271 Q5061915 \n", | |
"timo1237 Timotean_languages Q7806995 Q3217540 \n", | |
"tsis1238 Shingini_language Q13123571 Q35199 \n", | |
"tund1255 Tundra_Nenets_language Q34252 Q1564258 \n", | |
"uain1239 Mariat%C3%A9_language Q16910017 Q6762506 \n", | |
"vase1234 !O!ung_language Q56528 Q3832974 \n", | |
"waga1262 Wagawaga_language_(New_Guinea) Q16112427 Q7959485 \n", | |
"west2340 Western_Aragonese Q3574358 Q3574028 \n", | |
"west2488 Western_Krahn_language Q35809 Q10975611 \n", | |
"wudi1238 Nasu_language Q25559456 Q56403 \n", | |
"wyan1247 Wyandot_language Q3567223 Q1185119 \n", | |
"xian1249 <NA> Q12953305 Q10884275 \n", | |
"xinc1246 Guazacap%C3%A1n_language Q53428794 Q19572028 \n", | |
"yare1249 Ixtl%C3%A1n_Zapotec Q12645368 Q6101185 \n", | |
"yela1238 Yela_language Q32151338 Q8051428 \n", | |
"yout1234 <NA> Q63341264 Q31819036 \n", | |
"yuga1244 Yugambal_language Q16334334 Q3446663 \n", | |
"yulp1239 Yulparirra_language Q17319895 Q106554801 \n", | |
"zaca1241 <NA> Q2514044 Q2204061 \n", | |
"zeme1240 Zeme_language Q21491053 Q56373 \n", | |
"zena1250 Zenati_languages Q2741732 Q2293952 " | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"with pd.option_context('display.max_rows', 150):\n", | |
" display(mismatch)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment