This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import os.path | |
import sqlite3 | |
from bs4 import BeautifulSoup | |
from sys import exit | |
WORKING_DIR = 'XXX' | |
INPUT_DIR = 'input_html' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
proto.data <- read.csv('bdproto.csv', | |
sep = ',') | |
## Clean the data | |
p.d <- proto.data[ !is.na(proto.data$LanguageFamilyRoot) & | |
proto.data$LanguageFamilyRoot != '' & | |
!is.na(proto.data$LanguageName) & | |
proto.data$LanguageName != '', ] | |
## People mostly reconstruct weird stuff: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyglottolog.api import Glottolog | |
# 'full' is a pandas dataframe with glottocodes | |
api = Glottolog('/Users/macbook/tmp/glottolog') | |
gltc_temp = {} | |
gltc_err = set() | |
for i in range(full.shape[0]): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html> | |
<head> | |
<meta charset="utf8"> | |
<title>Crop’n’display demo</title> | |
<style> | |
.overlay { | |
padding: 30px; | |
width: 100vw; | |
height: 100vh; | |
position: absolute; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using DataFrames; | |
using Feather; | |
# Заранее подготовленная таблица расстояний между этносами | |
dist_data = Feather.read("geodistances.feather"); | |
@everywhere dist_array = Array{Int64}(926,926); | |
for i = 1:926 | |
for j = 2:927 | |
dist_array[i,j-1] = dist_data[i,j] | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
a | |
ab | |
abisses | |
abruptas | |
absistam | |
abstulit | |
ac | |
accendet | |
accepit | |
accepta |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import re | |
from functools import reduce | |
def process_phoneme(p): | |
"""Normalise phonetic symbols and enforce pharyngealised treatment of emphatics.""" | |
p = p.split('/')[0].replace(':', 'ː').replace('\u0361', '').replace('ˠ', 'ˤ').replace('\u033b', '').replace("'", 'ʰ').replace('\u032a', '') | |
if 'l' not in p and '\u0334' in p: | |
p = p.replace('\u0334', 'ˤ') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import subprocess | |
import re | |
# Convert to html using pandoc and capture output | |
fn = 'sources/re_docx/reflexive_letuchiy_20141102_nst_site.docx' | |
txt = subprocess.check_output(['pandoc', | |
'-f', 'docx', | |
'-t', 'html', | |
fn]).decode('utf8') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Extract selected sound (time from 0) | |
endeditor | |
duration = Get total duration | |
writeInfoLine: fixed$ (duration, 3) | |
# Replace 5000 with 5500 for female voice. | |
# You may need to tweak the number of formants (2nd parameter) based on | |
# your data. Sometimes, if two formants are close to each other, | |
# it is necessary to ask for 6 formants so that they may be decoupled. | |
# In other cases, however, this may introduce spurious formants, and | |
# it is always advisable to check both the spectrogram and |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Extract selected sound (time from 0) | |
endeditor | |
duration = Get total duration | |
writeInfoLine: fixed$ (duration, 3) | |
# Replace 5000 with 5500 for female voice. | |
# You may need to tweak the number of formants (2nd parameter) based on | |
# your data. Sometimes, if two formants are close to each other, | |
# it is necessary to ask for 6 formants so that they may be decoupled. | |
# In other cases, however, this may introduce spurious formants, and | |
# it is always advisable to check the spectrogram. |