Last active
February 18, 2019 21:27
-
-
Save fergusq/b259a5fa609d9b74c89692e5ee96a018 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
conllu := require("conllu") | |
} | |
main treebank { | |
time := currentTime() | |
conllu.parseSentences treebank | addTree _ | |
time = currentTime()-time | |
print time/1000, " s" | |
keys CHILD_TREES | { [ [#CHILD_TREES[word], word] ] for word if [ word =~ "root:.*" ] } | sort | tail 20 | print _ | |
push "> " | |
for word do | |
try do | |
if [ CHILD_TREES["root:"..word]? ] do | |
trees := CHILD_TREES["root:"..word] | |
print(#trees, ": ", [trees() | _.asString]&", ") | |
phrase := [(new Child(word, [], [], "root")).generateRandomPhrase()] | |
print(phrase&" ") | |
print([push(x) for x in phrase if [ not ("[" in x or "]" in x) ]]&" ") | |
done | |
catch e | |
errprint e.message | |
done | |
push "> " | |
done | |
} | |
addTree tree { | |
addTree child for child in tree.children_list | |
word := tree.relation..":"..lowerCase(tree.word) | |
tryCreate CHILD_TREES, word, [] | |
child_tree := new ChildTree | |
return if [ tree.relation in ["case", "mark"] ] | |
for child in tree.children_list do | |
continue if [ child.relation in ["conj", "cc", "ccomp", "case", "mark", "mwe", "remnant", "punct"] ] | |
if [ child.num < tree.num ] do | |
child_tree.left += toChild(child) | |
else | |
child_tree.right += toChild(child) | |
done | |
done | |
unless [ #child_tree.left+#child_tree.right = 0 ] do | |
CHILD_TREES[word] += child_tree | |
else | |
CHILD_TREES[word] += EMPTY_CHILD_TREE unless [ EMPTY_CHILD_TREE in CHILD_TREES[word] ] | |
done | |
} | |
tryCreate l, n, d { | |
l[n] = d unless [ l[n]? ] | |
} | |
toChild tree { | |
prefix := [] | |
suffix := [] | |
for type in ["case", "mark"] do | |
if [ tree.children[type]? ] do | |
for case in tree.children[type] do | |
if [ case.num < tree.num ] do | |
prefix += case.word | |
else | |
suffix += case.word | |
done | |
done | |
done | |
done | |
return new Child(tree.word, prefix, suffix, tree.relation) | |
} | |
post_load: { | |
CHILD_TREES := new map | |
EMPTY_CHILD_TREE := new ChildTree | |
} | |
record ChildTree { | |
left : list = [] | |
right : list = [] | |
function generateLeftChildren d { | |
self.left | _.generateRandomPhrase d=d, left=TRUE | |
} | |
function generateRightChildren d { | |
self.right | _.generateRandomPhrase d=d, left=FALSE | |
} | |
function asString { | |
{ | |
self.left | push _.asString(), " " | |
push "~" | |
self.right | push " ", _.asString() | |
} | concat | |
} | |
} | |
record Child(word, prefix, suffix, relation) { | |
word : string = word | |
prefix : list = prefix | |
suffix : list = suffix | |
relation : string = relation | |
function generateRandomPhrase d=0, left=FALSE { | |
#!return if [ d >= 3 ] | |
self.randomChildTree d | if tryPull child_tree do | |
print " "*d, "/[", prefix&" " if [ #prefix > 0 ] | |
child_tree.generateLeftChildren d+1 | |
print " "*d, (push(`/`) if [ left ] else push(`\`))..self.relation..":[", self.word | |
child_tree.generateRightChildren d+1 | |
print " "*d, "\\]", suffix&" " if [ #suffix > 0 ] | |
else | |
print " "*d, "/[", prefix&" " if [ #prefix > 0 ] | |
print " "*d, (push(`/`) if [ left ] else push(`\`))..self.relation..":[", self.word | |
print " "*d, "\\]", suffix&" " if [ #suffix > 0 ] | |
done | |
} | |
function randomChildTree d { | |
alternatives := CHILD_TREES[self.relation..":"..lowerCase(self.word)] | |
return unless [ #alternatives > 0 ] | |
select alternatives | |
} | |
function asString { | |
ans := "" | |
ans .= "("..self.prefix&" "..") " if [ #self.prefix > 0 ] | |
ans .= self.word | |
ans .= " ("..self.suffix&" "..")" if [ #self.suffix > 0 ] | |
return ans | |
} | |
} | |
rndn(a, b) { | |
return randomInteger()%(b-a+1)+a | |
} | |
times(n, f) { | |
seq 1, n | f for _ | |
} | |
select(alternatives) { | |
return alternatives[rndn(0, #alternatives-1)] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment