Skip to content

Instantly share code, notes, and snippets.

View macleginn's full-sized avatar

Dmitry Nikolayev macleginn

View GitHub Profile
from pyglottolog.api import Glottolog
# 'full' is a pandas dataframe with glottocodes
api = Glottolog('/Users/macbook/tmp/glottolog')
gltc_temp = {}
gltc_err = set()
for i in range(full.shape[0]):
proto.data <- read.csv('bdproto.csv',
sep = ',')
## Clean the data
p.d <- proto.data[ !is.na(proto.data$LanguageFamilyRoot) &
proto.data$LanguageFamilyRoot != '' &
!is.na(proto.data$LanguageName) &
proto.data$LanguageName != '', ]
## People mostly reconstruct weird stuff:
@macleginn
macleginn / preprocess.py
Created January 26, 2019 21:37
Preprocess Berezkin
import os
import os.path
import sqlite3
from bs4 import BeautifulSoup
from sys import exit
WORKING_DIR = 'XXX'
INPUT_DIR = 'input_html'
import sqlite3
import pandas as pd
import html
import re
# Убираем <br>, <br/>, </br>; заменяем любые последовательности
# whitespace-символов на один пробел.
def normalise_ws(s):
s = re.sub(r'</?br/?>', ' ', s)
s = re.sub(r'[\n\r]+ *', ' ', s)
@macleginn
macleginn / id_rsa.pub
Created June 27, 2019 09:50
Fedora public key
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDhQ+XNwDzwJ8OVqWIqguiebJmC3QAbsJvTAKeI+ab67BpWN2KDXlmEq/jaKR5PONnJoMXUY5Kd0gRwt+pU+mNlhycSRfHZyFjgT9SA4cwWOazHTMZ3pEFU8hZZHatAcaJB1yUKjfPy8zbQrzahE4KCVuBtyiTmbhCFKTDdsJ0e4YeRGHJ5bv+jOcAmiu4VgDdTJCI77Z4cQX32IMSn/c8GvLjQTTlzcQqNLEMGGQEuvX8V8ml1rMAIDrANAsQ3FmqjuSsZoHjI2pi16CgRm5mgDq9f3ZlbwH8NUm+9Nn2h3ytTRIJyYZUQ2zqHUiEsa1GxLK/2t24vjQmazNFsEch/wkmetG/TfGTMAXDmaNZrPCa9D13p5zbx410uBcnv9Jgg9NcleiESEFnLHFgCM25WBBGy17Lb4DLE+6judEORIqId1GLTLUosd/PBkWWh/SYU0d6Mb8lIalaUlOmZuytYTbEaigpFZBnnDcbESQSdimDNFRTqqoFjgor6YLtrrCc= dniko@localhost.localdomain
// A JS version of Python's "get" method for dicts.
function get(dict: object, key: any, plug: any) {
if (dict.hasOwnProperty(key))
return dict[key];
else
return plug;
}
function convertToUnicode(input: string): string {
@macleginn
macleginn / plotly-offline-plot.py
Created October 23, 2019 08:49
An example of an offline Plotly plot created using Python with Plotly.js included once in the <head> section of the page.
import plotly.express as px
import plotly.offline
template = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<script>{plotly}</script>
</head>
confusion_dict_pos = {}
confusion_dict_paths = {}
# NEW STUFF #
addition_stats_pos = Counter()
addition_stats_rel = Counter()
# NEW STUFF #
strip_direction = lambda x: x.split('_')[0]
import numpy as np
import matplotlib.pyplot as plt
N = 5
menMeans = (20, 35, 30, 35, 27)
womenMeans = (25, 32, 34, 20, 25)
menStd = (2, 3, 4, 1, 2)
womenStd = (3, 5, 2, 3, 3)
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars: can also be len(x) sequence
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# test.csv:
# ,b,c,d
# p,1,2,3
# q,4,5,6
# r,7,8,9