Skip to content

Instantly share code, notes, and snippets.

@toVersus
Created April 14, 2018 13:58
Show Gist options
  • Save toVersus/ed36c4f84918948b2b3d72ce0097b694 to your computer and use it in GitHub Desktop.
Save toVersus/ed36c4f84918948b2b3d72ce0097b694 to your computer and use it in GitHub Desktop.
[Language Processing 100 Essentials] #59: Parse S-expression
package main
import (
"encoding/xml"
"flag"
"fmt"
"os"
"strings"
)
type Root struct {
Document *Document `xml:"document"`
}
type Document struct {
Sentences Sentences `xml:"sentences>sentence"`
Coreferences Coreferences `xml:"coreference>coreference"`
}
type Sentences []*Sentence
type Coreferences []*Coreference
type Sentence struct {
ID int `xml:"id,attr"`
Dependencies Dependencies `xml:"dependencies"`
Parse string `xml:"parse,omitempty"`
Tokens Tokes `xml:"tokens>token,omitempty"`
}
type Dependencies []*Dependencie
type Tokes []*Token
type Token struct {
ID string `xml:"id,attr"`
Word string `xml:"word,omitempty"`
Lemma string `xml:"lemma,omitempty"`
CharacterOffsetBegin int `xml:"CharacterOffsetBegin,omitempty"`
CharacterOffsetEnd int `xml:"CharacterOffsetEnd,omitempty"`
POS string `xml:"POS,omitempty"`
NER string `xml:"NER,omitempty"`
NormalizedNER string `xml:"NormalizedNER,omitempty"`
Speaker string `xml:"Speaker,omitempty"`
Timex *Timex `xml:"Timex,omitempty"`
}
type Timex struct {
Tid string `xml:"tid,attr"`
Type string `xml:"type,attr"`
Value string `xml:",chardata"`
}
type Governor struct {
Copy string `xml:"copy,attr"`
Idx int `xml:"idx,attr"`
Value string `xml:",chardata"`
}
type Dependent struct {
Copy string `xml:"copy,attr"`
Idx int `xml:"idx,attr"`
Value string `xml:",chardata"`
}
type Dep struct {
Extra string `xml:"extra,attr"`
Type string `xml:"type,attr"`
Dependent *Dependent `xml:"dependent,omitempty"`
Governor *Governor `xml:"governor,omitempty"`
}
type Dependencie struct {
Type string `xml:"type,attr"`
Deps Deps `xml:"dep,omitempty"`
}
type Deps []*Dep
type Coreference struct {
Mentions Mentions `xml:"mention,omitempty"`
}
type Mentions []*Mention
type Mention struct {
Representative string `xml:"representative,attr"`
Sentence int `xml:"sentence,omitempty"`
Start int `xml:"start,omitempty"`
End int `xml:"end,omitempty"`
Head int `xml:"head,omitempty"`
Text string `xml:"text,omitempty"`
}
type node struct {
parent *node
child []*node
pos string
value string
}
func (n *node) walkNPString() {
for i := len(n.child) - 1; i >= 0; i-- {
if n.child[i].value == "" || n.child[i].value == "," || n.child[i].value == "." {
n.child[i].walkNPString()
continue
}
if n.pos != "NP" || n.value != "" {
n.child[i].walkNPString()
continue
}
//fmt.Printf("parent: %p, type: %s, value: %s\n", n.child[i].parent, n.child[i].pos, n.child[i].value)
//fmt.Printf("%p == %p\n", n, n.child[i].parent)
fmt.Println(n.child[i].value)
n.child[i].walkNPString()
}
}
func newRootNode() *node {
return &node{
parent: &node{},
}
}
func newNode(parent *node) *node {
return &node{
parent: parent,
}
}
func parse(str string) (*node, error) {
if str[0] != '(' {
return nil, fmt.Errorf("Initial letter must be '('\n input string: %s", str)
}
return newRootNode().addChild(str[1:]), nil
}
func (n *node) addChild(str string) *node {
if len(str) == 0 {
return n
}
str = strings.TrimSpace(str)
switch str[0] {
case '(':
next := newNode(n)
next = next.addChild(str[1:])
n.child = append(n.child, next)
case ')':
n.parent = n.parent.addChild(str[1:])
default:
for i, s := range str {
if (s != '(') && (s != ')') {
continue
}
tmp := strings.Split(str[:i], " ")
n.pos = tmp[0]
if len(tmp) == 2 {
n.value = tmp[1]
}
n = n.addChild(str[i:])
break
}
}
return n
}
func main() {
var filePath string
var sentenceNum int
flag.StringVar(&filePath, "file", "", "specify a file path")
flag.StringVar(&filePath, "f", "", "specify a file path")
flag.IntVar(&sentenceNum, "n", 1, "specify number of sentence")
flag.Parse()
r, err := readXML(filePath)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
sexp := r.Document.Sentences[sentenceNum].Parse
node, err := parse(sexp)
if err != nil {
fmt.Print(err)
os.Exit(1)
}
node.walkNPString()
}
// readXML reads the result of Stanford Core NLP and initiate the Root struct
func readXML(path string) (*Root, error) {
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("could not open a file: %s\n %s", path, err)
}
defer f.Close()
r := &Root{}
dec := xml.NewDecoder(f)
err = dec.Decode(r)
if err != nil {
return nil, err
}
return r, nil
}
@toVersus
Copy link
Author

toVersus commented Apr 14, 2018

Incomplete work!
Currently just refactor the following code:
https://github.com/cipepser/goSExpression-sample/tree/master/gose

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment