This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyglottolog.api import Glottolog | |
# 'full' is a pandas dataframe with glottocodes | |
api = Glottolog('/Users/macbook/tmp/glottolog') | |
gltc_temp = {} | |
gltc_err = set() | |
for i in range(full.shape[0]): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
proto.data <- read.csv('bdproto.csv', | |
sep = ',') | |
## Clean the data | |
p.d <- proto.data[ !is.na(proto.data$LanguageFamilyRoot) & | |
proto.data$LanguageFamilyRoot != '' & | |
!is.na(proto.data$LanguageName) & | |
proto.data$LanguageName != '', ] | |
## People mostly reconstruct weird stuff: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import os.path | |
import sqlite3 | |
from bs4 import BeautifulSoup | |
from sys import exit | |
WORKING_DIR = 'XXX' | |
INPUT_DIR = 'input_html' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sqlite3 | |
import pandas as pd | |
import html | |
import re | |
# Убираем <br>, <br/>, </br>; заменяем любые последовательности | |
# whitespace-символов на один пробел. | |
def normalise_ws(s): | |
s = re.sub(r'</?br/?>', ' ', s) | |
s = re.sub(r'[\n\r]+ *', ' ', s) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDhQ+XNwDzwJ8OVqWIqguiebJmC3QAbsJvTAKeI+ab67BpWN2KDXlmEq/jaKR5PONnJoMXUY5Kd0gRwt+pU+mNlhycSRfHZyFjgT9SA4cwWOazHTMZ3pEFU8hZZHatAcaJB1yUKjfPy8zbQrzahE4KCVuBtyiTmbhCFKTDdsJ0e4YeRGHJ5bv+jOcAmiu4VgDdTJCI77Z4cQX32IMSn/c8GvLjQTTlzcQqNLEMGGQEuvX8V8ml1rMAIDrANAsQ3FmqjuSsZoHjI2pi16CgRm5mgDq9f3ZlbwH8NUm+9Nn2h3ytTRIJyYZUQ2zqHUiEsa1GxLK/2t24vjQmazNFsEch/wkmetG/TfGTMAXDmaNZrPCa9D13p5zbx410uBcnv9Jgg9NcleiESEFnLHFgCM25WBBGy17Lb4DLE+6judEORIqId1GLTLUosd/PBkWWh/SYU0d6Mb8lIalaUlOmZuytYTbEaigpFZBnnDcbESQSdimDNFRTqqoFjgor6YLtrrCc= dniko@localhost.localdomain |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// A JS version of Python's "get" method for dicts. | |
function get(dict: object, key: any, plug: any) { | |
if (dict.hasOwnProperty(key)) | |
return dict[key]; | |
else | |
return plug; | |
} | |
function convertToUnicode(input: string): string { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import plotly.express as px | |
import plotly.offline | |
template = """ | |
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="utf-8"/> | |
<script>{plotly}</script> | |
</head> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
confusion_dict_pos = {} | |
confusion_dict_paths = {} | |
# NEW STUFF # | |
addition_stats_pos = Counter() | |
addition_stats_rel = Counter() | |
# NEW STUFF # | |
strip_direction = lambda x: x.split('_')[0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
N = 5 | |
menMeans = (20, 35, 30, 35, 27) | |
womenMeans = (25, 32, 34, 20, 25) | |
menStd = (2, 3, 4, 1, 2) | |
womenStd = (3, 5, 2, 3, 3) | |
ind = np.arange(N) # the x locations for the groups | |
width = 0.35 # the width of the bars: can also be len(x) sequence |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import matplotlib.pyplot as plt | |
import numpy as np | |
# test.csv: | |
# ,b,c,d | |
# p,1,2,3 | |
# q,4,5,6 | |
# r,7,8,9 |