Skip to content

Instantly share code, notes, and snippets.

@fergusq
Last active February 18, 2019 21:27
Show Gist options
  • Save fergusq/b259a5fa609d9b74c89692e5ee96a018 to your computer and use it in GitHub Desktop.
Save fergusq/b259a5fa609d9b74c89692e5ee96a018 to your computer and use it in GitHub Desktop.
{
conllu := require("conllu")
}
main treebank {
time := currentTime()
conllu.parseSentences treebank | addTree _
time = currentTime()-time
print time/1000, " s"
keys CHILD_TREES | { [ [#CHILD_TREES[word], word] ] for word if [ word =~ "root:.*" ] } | sort | tail 20 | print _
push "> "
for word do
try do
if [ CHILD_TREES["root:"..word]? ] do
trees := CHILD_TREES["root:"..word]
print(#trees, ": ", [trees() | _.asString]&", ")
phrase := [(new Child(word, [], [], "root")).generateRandomPhrase()]
print(phrase&" ")
print([push(x) for x in phrase if [ not ("[" in x or "]" in x) ]]&" ")
done
catch e
errprint e.message
done
push "> "
done
}
addTree tree {
addTree child for child in tree.children_list
word := tree.relation..":"..lowerCase(tree.word)
tryCreate CHILD_TREES, word, []
child_tree := new ChildTree
return if [ tree.relation in ["case", "mark"] ]
for child in tree.children_list do
continue if [ child.relation in ["conj", "cc", "ccomp", "case", "mark", "mwe", "remnant", "punct"] ]
if [ child.num < tree.num ] do
child_tree.left += toChild(child)
else
child_tree.right += toChild(child)
done
done
unless [ #child_tree.left+#child_tree.right = 0 ] do
CHILD_TREES[word] += child_tree
else
CHILD_TREES[word] += EMPTY_CHILD_TREE unless [ EMPTY_CHILD_TREE in CHILD_TREES[word] ]
done
}
tryCreate l, n, d {
l[n] = d unless [ l[n]? ]
}
toChild tree {
prefix := []
suffix := []
for type in ["case", "mark"] do
if [ tree.children[type]? ] do
for case in tree.children[type] do
if [ case.num < tree.num ] do
prefix += case.word
else
suffix += case.word
done
done
done
done
return new Child(tree.word, prefix, suffix, tree.relation)
}
post_load: {
CHILD_TREES := new map
EMPTY_CHILD_TREE := new ChildTree
}
record ChildTree {
left : list = []
right : list = []
function generateLeftChildren d {
self.left | _.generateRandomPhrase d=d, left=TRUE
}
function generateRightChildren d {
self.right | _.generateRandomPhrase d=d, left=FALSE
}
function asString {
{
self.left | push _.asString(), " "
push "~"
self.right | push " ", _.asString()
} | concat
}
}
record Child(word, prefix, suffix, relation) {
word : string = word
prefix : list = prefix
suffix : list = suffix
relation : string = relation
function generateRandomPhrase d=0, left=FALSE {
#!return if [ d >= 3 ]
self.randomChildTree d | if tryPull child_tree do
print " "*d, "/[", prefix&" " if [ #prefix > 0 ]
child_tree.generateLeftChildren d+1
print " "*d, (push(`/`) if [ left ] else push(`\`))..self.relation..":[", self.word
child_tree.generateRightChildren d+1
print " "*d, "\\]", suffix&" " if [ #suffix > 0 ]
else
print " "*d, "/[", prefix&" " if [ #prefix > 0 ]
print " "*d, (push(`/`) if [ left ] else push(`\`))..self.relation..":[", self.word
print " "*d, "\\]", suffix&" " if [ #suffix > 0 ]
done
}
function randomChildTree d {
alternatives := CHILD_TREES[self.relation..":"..lowerCase(self.word)]
return unless [ #alternatives > 0 ]
select alternatives
}
function asString {
ans := ""
ans .= "("..self.prefix&" "..") " if [ #self.prefix > 0 ]
ans .= self.word
ans .= " ("..self.suffix&" "..")" if [ #self.suffix > 0 ]
return ans
}
}
rndn(a, b) {
return randomInteger()%(b-a+1)+a
}
times(n, f) {
seq 1, n | f for _
}
select(alternatives) {
return alternatives[rndn(0, #alternatives-1)]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment