Created
September 5, 2018 17:56
-
-
Save macleginn/51edac30a5f0cb82116d2e5f0a45fa85 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyglottolog.api import Glottolog | |
# 'full' is a pandas dataframe with glottocodes | |
api = Glottolog('/Users/macbook/tmp/glottolog') | |
gltc_temp = {} | |
gltc_err = set() | |
for i in range(full.shape[0]): | |
r = full.iloc[i,:] | |
gltc = r['Borrowing language (glottocode)'] | |
if gltc in gltc_temp or gltc in gltc_err: | |
continue | |
else: | |
lang = api.languoid(gltc) | |
if lang is None: | |
gltc_err.add(gltc) | |
print('Check gltc: %s' % gltc) | |
else: | |
try: | |
macroarea = api.languoid(gltc).macroareas[0].name | |
except: | |
macroarea = '' | |
try: | |
family = lang.family.name | |
except: | |
family = '' | |
gltc_temp[gltc] = { | |
'name': lang.name, | |
'family': family, | |
'lat': lang.latitude, | |
'lon': lang.longitude, | |
'macroarea': macroarea | |
} | |
for i in range(full.shape[0]): | |
r = full.iloc[i,:] | |
gltc = r['Borrowing language (glottocode)'] | |
if gltc in gltc_err or len(gltc) < 3: | |
continue | |
r['Borrowing language (name)'] = gltc_temp[gltc]['name'] | |
r['Borrowing language family'] = gltc_temp[gltc]['family'] | |
r['Borrowing language latitude'] = gltc_temp[gltc]['lat'] | |
r['Borrowing language longitude'] = gltc_temp[gltc]['lon'] | |
r['Macroregion'] = gltc_temp[gltc]['macroarea'] | |
full.to_csv('full_df.csv', sep = '\t', index = False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment