View bradley_terry.py
#!/usr/bin/env python3 | |
__author__ = 'Dmitry Ustalov' | |
__copyright__ = 'Copyright 2021 Dmitry Ustalov' | |
__license__ = 'MIT' # https://opensource.org/licenses/MIT | |
import numpy as np | |
EPS = 1e-8 |
View dirbackup
#!/bin/sh | |
CWD=$(basename "$PWD") | |
XZ_OPT="-T 0" tar --exclude '*~' -C ../ -cJvf "../$CWD.tar.xz" "$CWD" |
View Makefile
WATSET ?= ../watset-java/target/watset.jar | |
LCC ?= ../lcc | |
export LANG:=en_US.UTF-8 | |
export LC_COLLATE:=C | |
export CLASSPATH := $(WATSET) | |
nodes: | |
cut -f1,2 $(LCC)/eng_news_2016_10K/eng_news_2016_10K-co_s.txt | sed -re 's/\t/\n/g' | sort -u | wc -l | |
cut -f1,2 $(LCC)/eng_news_2016_30K/eng_news_2016_30K-co_s.txt | sed -re 's/\t/\n/g' | sort -u | wc -l |
View sigf.py
#!/usr/bin/env python3 | |
__author__ = 'Dmitry Ustalov' | |
__credits__ = 'Sebastian Padó' | |
__license__ = 'MIT' | |
# This is an MIT-licensed implementation of the sigf toolkit for randomization tests: | |
# https://nlpado.de/~sebastian/software/sigf.shtml | |
import random |
View collocation.groovy
#!/usr/bin/env groovy | |
import org.apache.commons.math3.stat.descriptive.moment.Mean | |
import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation | |
import org.jgrapht.graph.SimpleWeightedGraph | |
import org.jgrapht.util.SupplierUtil | |
import org.nlpub.watset.graph.ChineseWhispers | |
import org.nlpub.watset.graph.NodeWeighting | |
import org.nlpub.watset.graph.MaxMax | |
import org.nlpub.watset.eval.Measurer | |
import org.nlpub.watset.graph.Watset |
View Makefile
LC_COLLATE = C | |
SEED = 1337 | |
WCL_WRAPPER = /srv/definitions/wcl-extract | |
measure: | |
./measure.py | |
kfold: wiki_really_all.txt | |
./kfold.py --seed=$(SEED) $< |
View nmpu.py
#!/usr/bin/env python | |
# This script computes the normalized modified purity and inverse purity | |
# as according to this paper: https://aclweb.org/anthology/P14-1097. | |
# In fact, this program is currently quite a rough translation of | |
# the evaluation-verb-classes.perl script provided by Daisuke Kawahara. | |
import argparse | |
import re | |
import sys |
View ztest.awk
#!/usr/bin/awk -f | |
BEGIN { | |
# significance level | |
if (length(ALPHA) == 0) ALPHA = 0.05; | |
# standard error estimation method: "basic" or "pooled" | |
if (length(SE) == 0) SE = "basic"; | |
# one-tailed or two-tailed? | |
if (TAILS != 2) TAILS = 1; |
View extract-relations.groovy
#!/usr/bin/env groovy | |
import de.tudarmstadt.ukp.jwktl.JWKTL | |
import de.tudarmstadt.ukp.jwktl.api.filter.WiktionaryEntryFilter | |
import de.tudarmstadt.ukp.jwktl.api.util.Language | |
final languages = [en: Language.ENGLISH, ru: Language.RUSSIAN, de: Language.GERMAN] | |
if (args.length != 2 || !languages.containsKey(args[1] = args[1].toLowerCase())) { | |
throw new IllegalArgumentException('Required arguments: <PARSED-WIKTIONARY> en|ru|de') | |
} |
View decoder.sh
#!/bin/bash -e | |
S=$(head -1) | |
CHARSETS=(utf8 cp1251 cp1252 koi8r koi8u iso-8859-5 maccyrillic) | |
for c1 in ${CHARSETS[*]}; do | |
for c2 in ${CHARSETS[*]}; do | |
for c3 in ${CHARSETS[*]}; do | |
for c4 in ${CHARSETS[*]}; do | |
echo -ne "$c1\t$c2\t$c3\t$c4\t" | |
<<<$S iconv -f=$c1 -t=$c2 -c | iconv -f=$c3 -t=$c4 -c | |
done |
NewerOlder