Skip to content

Instantly share code, notes, and snippets.

@xissy
Created August 28, 2013 22:55
Show Gist options
  • Save xissy/6372428 to your computer and use it in GitHub Desktop.
Save xissy/6372428 to your computer and use it in GitHub Desktop.
Parse a Stanford CoreNLP parsed string.
treeString = '''(ROOT (S (NP (PRP They)) (VP (VP (VBD were) (VP (VBN cost) (S (ADJP (JJ effective))))) (, ,) (VP (VBD held) (PRT (RP up)) (NP (NP (JJ great)) (, ,) (NP (DT no) (NNS leaks)) (, ,) (NP (DT no) (NNS dyes)) (CC or) (NP (JJ harsh) (NNS chemicals)))) (, ,) (CC and) (VP (VBD had) (NP (NP (DT a) (JJ simple) (NN design)) (PP (IN on) (NP (DT the) (NN outside)))))) (. .)))'''
parsedList = []
wordIndex = 1
parseRecursive = (treeString, currentPosition) ->
treeString = treeString[1..-2]
splittedTokens = treeString.split(' ')
firstToken = splittedTokens[0]
treeString = splittedTokens[1..-1].join(' ')
newList = []
currentPosition.push
type: firstToken
children: newList
currentPosition = newList
bracketCount = 0
currentTreeString = ''
isBracket = false
for char in treeString
currentTreeString += char
if char is '('
bracketCount++
isBracket = true
else if char is ')'
bracketCount--
if bracketCount is 0
currentTreeString = currentTreeString.replace(/^\s+|\s+$/g, '') # trim
console.log currentTreeString
parseRecursive currentTreeString, currentPosition
currentTreeString = ''
if not isBracket
currentPosition.push
type: firstToken
word: splittedTokens[1]
id: wordIndex
wordIndex++
parseRecursive treeString, parsedList
console.log JSON.stringify parsedList
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment