Skip to content

Instantly share code, notes, and snippets.

maxlen 15 unfolded False arity marks True binarized collinize right h=1 v=1 tailmarker markovize rank > 3 estimator dop1
python -u runexp.py 56312.42s user 296.32s system 96% cpu 16:20:51.28 total
../disco-dop/interp0.dop
labeled f-measure : 76.12293144208039
unlabeled f-measure : 79.19621749408984
../disco-dop/interp1.dop
labeled f-measure : 68.59122401847574
unlabeled f-measure : 72.74826789838338
../disco-dop/interp2.dop
labeled f-measure : 69.07723459647092
Mary is really very happy --> is Mary really very happy?
0.50 S --> NP VP [(0, 1)]
0.50 S --> VP_2 NP [(0, 1, 0)]
1.00 VP --> V VP|<ADV> [(0, 1)]
1.00 VP_2 --> V VP|<ADV> [(0,), (1,)]
0.50 VP|<ADV> --> ADV VP|<ADV> [(0, 1)]
0.50 VP|<ADV> --> ADV ADJ [(0, 1)]
0.50 ADJ --> Epsilon ['happy']
0.50 ADJ --> Epsilon ['sad']
0.50 ADV --> Epsilon ['really']
@andreasvc
andreasvc / array_bench.pyx
Created November 17, 2012 14:45
Benchmark array creation
import time
import numpy as np
cimport numpy as np
from libc.stdlib cimport malloc, free
from cpython.array cimport array, clone
cdef long N = 1000000
cdef double* ptr
cdef array ar, template = array('d')
@andreasvc
andreasvc / t.lex
Created November 18, 2012 23:33
TSG transforms
The D 1 D@1-1 1 D@0-1 1
dog N 0.5 N@0-2 1
cat N@1-2 1 N 0.5
barks V 0.5 V@0-4 1
meows V 0.5 V@1-4 1
loudly RB 1 RB@0-5 1 RB@1-5 1
@andreasvc
andreasvc / draaideur.py
Created January 10, 2013 22:27
Given a dictionary, find words for which all rotations occur in the dictionary.
import sys, collections
def rotations(a):
return {a[x:] + a [:x] for x in range(1, len(a))}
lexicon = collections.defaultdict(set)
for a in open(sys.argv[1]):
lexicon[len(a) - 1].add(a.strip())
for length in sorted(lexicon):
if length == 1:
continue
@andreasvc
andreasvc / pixel sorting.ipynb
Created February 17, 2013 17:15
Convert image to 1-dimensional sequence of RGB pixels, sort, and convert back.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@andreasvc
andreasvc / bug.pyx
Last active December 16, 2015 11:18
cdef extern from "macros.h":
# test whether the b'th bit of array a is set:
unsigned long TESTBIT(unsigned long a[], int b)
cdef unsigned long foo[2]
foo[0] = 281474976710656UL
foo[1] = 0
# what the macro does:
print foo[0] & (1UL << 48)
@andreasvc
andreasvc / cartpi.py
Last active March 29, 2017 02:33
Get the cartesian product of an arbitrary number of iterables, including infinite sequences.
def cartpi(seq):
""" A depth-first cartesian product for a sequence of iterables;
i.e., all values of the last iterable are consumed before advancing the
preceding ones. Like itertools.product(), but supports infinite sequences.
>>> from itertools import islice, count
>>> list(islice(cartpi([count(), count()]), 9))
[(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8)]
"""
if seq:
@andreasvc
andreasvc / sentnums.py
Last active December 20, 2015 08:49
Match lines in one file with those of another, and produce line numbers.
""" Match lines in one file with those of another,
and produce line numbers. """
import io
import sys
USAGE = """Match lines in one file with those of another, and get line numbers.
usage: python %s sents text output
where sents and text are files with one sentence per line.
The result will be of the form "1|line", written to file "output".
Everything is assumed to be encoded with UTF-8.""" % sys.argv[0]
@andreasvc
andreasvc / logprobs.py
Last active December 20, 2015 22:28
Compare different strategies for adding a large number of small log probabilities.
""" Compare different strategies for adding a large number of small log
probabilities. """
from __future__ import print_function
from math import log, exp, fsum, isinf
from random import expovariate
N = 10000
def logprobadd(x, y):