Skip to content

Instantly share code, notes, and snippets.

Dmitry Ustalov dustalov

Block or report user

Report or block dustalov

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
@dustalov
dustalov / Makefile
Last active Aug 31, 2019
Chinese Whispers and Telephone Game Performance Evaluation
View Makefile
WATSET ?= ../watset-java/target/watset.jar
LCC ?= ../lcc
export LANG:=en_US.UTF-8
export LC_COLLATE:=C
export CLASSPATH := $(WATSET)
nodes:
cut -f1,2 $(LCC)/eng_news_2016_10K/eng_news_2016_10K-co_s.txt | sed -re 's/\t/\n/g' | sort -u | wc -l
cut -f1,2 $(LCC)/eng_news_2016_30K/eng_news_2016_30K-co_s.txt | sed -re 's/\t/\n/g' | sort -u | wc -l
@dustalov
dustalov / sigf.py
Last active Nov 25, 2018
An implementation of the sigf toolkit for randomization tests in Python 3
View sigf.py
#!/usr/bin/env python
# This is an MIT-licensed implementation of the sigf toolkit
# for randomization tests: https://nlpado.de/~sebastian/software/sigf.shtml
from random import getrandbits
import sys
def randomized_test(model1, model2, score, trials):
print('# score(model1) = %f' % score(model1), file=sys.stderr)
@dustalov
dustalov / collocation.groovy
Last active Jun 23, 2019
Watset (Java) Performance Measurement
View collocation.groovy
#!/usr/bin/env groovy
import org.apache.commons.math3.stat.descriptive.moment.Mean
import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation
import org.jgrapht.graph.SimpleWeightedGraph
import org.jgrapht.util.SupplierUtil
import org.nlpub.watset.graph.ChineseWhispers
import org.nlpub.watset.graph.NodeWeighting
import org.nlpub.watset.graph.MaxMax
import org.nlpub.watset.eval.Measurer
import org.nlpub.watset.graph.Watset
@dustalov
dustalov / flatten.awk
Last active Jan 30, 2018
Flatten the results of the Stanford's Neural Parser.
View flatten.awk
#!/usr/bin/awk -f
BEGIN {
RS = "\n\n";
FS = "\n";
OFS = ",";
ORS = "";
}
{
@dustalov
dustalov / Makefile
Last active Jan 11, 2018
Extracting and cross-validating the WCL dataset of the 1.0 version
View Makefile
LC_COLLATE = C
SEED = 1337
WCL_WRAPPER = /srv/definitions/wcl-extract
measure:
./measure.py
kfold: wiki_really_all.txt
./kfold.py --seed=$(SEED) $<
@dustalov
dustalov / nmpu.py
Last active Jan 2, 2018
Normalized Modified Purity in Python.
View nmpu.py
#!/usr/bin/env python
# This script computes the normalized modified purity and inverse purity
# as according to this paper: https://aclweb.org/anthology/P14-1097.
# In fact, this program is currently quite a rough translation of
# the evaluation-verb-classes.perl script provided by Daisuke Kawahara.
import argparse
import re
import sys
@dustalov
dustalov / agreement.groovy
Last active Aug 28, 2017
Calculate inter-annotator agreement using DKPro Agreement in Groovy.
View agreement.groovy
#!/usr/bin/env groovy
import java.util.Locale
Locale.setDefault(Locale.ROOT)
@Grab('org.apache.commons:commons-csv:1.4')
import org.apache.commons.csv.CSVParser
import static org.apache.commons.csv.CSVFormat.EXCEL
@Grab('org.dkpro.statistics:dkpro-statistics-agreement:2.1.0')
@dustalov
dustalov / ztest.awk
Last active Feb 8, 2017
Pairwise statistical significance test in AWK using Z-test.
View ztest.awk
#!/usr/bin/awk -f
BEGIN {
# significance level
if (length(ALPHA) == 0) ALPHA = 0.05;
# standard error estimation method: "basic" or "pooled"
if (length(SE) == 0) SE = "basic";
# one-tailed or two-tailed?
if (TAILS != 2) TAILS = 1;
@dustalov
dustalov / extract-relations.groovy
Last active Jun 5, 2018
Extract semantic relations from Wiktionary using JWKTL.
View extract-relations.groovy
#!/usr/bin/env groovy
import de.tudarmstadt.ukp.jwktl.JWKTL
import de.tudarmstadt.ukp.jwktl.api.filter.WiktionaryEntryFilter
import de.tudarmstadt.ukp.jwktl.api.util.Language
final languages = [en: Language.ENGLISH, ru: Language.RUSSIAN, de: Language.GERMAN]
if (args.length != 2 || !languages.containsKey(args[1] = args[1].toLowerCase())) {
throw new IllegalArgumentException('Required arguments: <PARSED-WIKTIONARY> en|ru|de')
}
@dustalov
dustalov / decoder.sh
Created Sep 13, 2016
A brute force decoder of Cyrillic strings with unknown charset combination.
View decoder.sh
#!/bin/bash -e
S=$(head -1)
CHARSETS=(utf8 cp1251 cp1252 koi8r koi8u iso-8859-5 maccyrillic)
for c1 in ${CHARSETS[*]}; do
for c2 in ${CHARSETS[*]}; do
for c3 in ${CHARSETS[*]}; do
for c4 in ${CHARSETS[*]}; do
echo -ne "$c1\t$c2\t$c3\t$c4\t"
<<<$S iconv -f=$c1 -t=$c2 -c | iconv -f=$c3 -t=$c4 -c
done
You can’t perform that action at this time.