Skip to content

Instantly share code, notes, and snippets.

@cipepser
Created August 20, 2017 09:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cipepser/01e16fef3dd5e2f567fa7b63896e239d to your computer and use it in GitHub Desktop.
Save cipepser/01e16fef3dd5e2f567fa7b63896e239d to your computer and use it in GitHub Desktop.
package main
import (
"encoding/xml"
"fmt"
"os"
)
type CharacterOffsetBegin struct {
Text string `xml:",chardata" json:",omitempty"`
}
type CharacterOffsetEnd struct {
Text string `xml:",chardata" json:",omitempty"`
}
type ChidleyRoot314159 struct {
Root *Root `xml:" root,omitempty" json:"root,omitempty"`
}
type NER struct {
Text string `xml:",chardata" json:",omitempty"`
}
type NormalizedNER struct {
Text string `xml:",chardata" json:",omitempty"`
}
type POS struct {
Text string `xml:",chardata" json:",omitempty"`
}
type Speaker struct {
Text string `xml:",chardata" json:",omitempty"`
}
type Timex struct {
AttrTid string `xml:" tid,attr" json:",omitempty"`
AttrType string `xml:" type,attr" json:",omitempty"`
Text string `xml:",chardata" json:",omitempty"`
}
type Coreference struct {
Coreference *Coreference `xml:" coreference,omitempty" json:"coreference,omitempty"`
Mention []*Mention `xml:" mention,omitempty" json:"mention,omitempty"`
}
type Dep struct {
AttrExtra string `xml:" extra,attr" json:",omitempty"`
AttrType string `xml:" type,attr" json:",omitempty"`
Dependent *Dependent `xml:" dependent,omitempty" json:"dependent,omitempty"`
Governor *Governor `xml:" governor,omitempty" json:"governor,omitempty"`
}
type Dependencies struct {
AttrType string `xml:" type,attr" json:",omitempty"`
Dep []*Dep `xml:" dep,omitempty" json:"dep,omitempty"`
}
type Dependent struct {
AttrCopy string `xml:" copy,attr" json:",omitempty"`
AttrIdx string `xml:" idx,attr" json:",omitempty"`
Text string `xml:",chardata" json:",omitempty"`
}
type Document struct {
Coreference *Coreference `xml:" coreference,omitempty" json:"coreference,omitempty"`
Sentences *Sentences `xml:" sentences,omitempty" json:"sentences,omitempty"`
}
type End struct {
Text string `xml:",chardata" json:",omitempty"`
}
type Governor struct {
AttrCopy string `xml:" copy,attr" json:",omitempty"`
AttrIdx string `xml:" idx,attr" json:",omitempty"`
Text string `xml:",chardata" json:",omitempty"`
}
type Head struct {
Text string `xml:",chardata" json:",omitempty"`
}
type Lemma struct {
Text string `xml:",chardata" json:",omitempty"`
}
type Mention struct {
AttrRepresentative string `xml:" representative,attr" json:",omitempty"`
End *End `xml:" end,omitempty" json:"end,omitempty"`
Head *Head `xml:" head,omitempty" json:"head,omitempty"`
Sentence []*Sentence `xml:" sentence,omitempty" json:"sentence,omitempty"`
Start *Start `xml:" start,omitempty" json:"start,omitempty"`
Text *Text `xml:" text,omitempty" json:"text,omitempty"`
}
type Parse struct {
Text string `xml:",chardata" json:",omitempty"`
}
type Root struct {
Document *Document `xml:" document,omitempty" json:"document,omitempty"`
}
type Sentence struct {
AttrId string `xml:" id,attr" json:",omitempty"`
Dependencies []*Dependencies `xml:" dependencies,omitempty" json:"dependencies,omitempty"`
Parse *Parse `xml:" parse,omitempty" json:"parse,omitempty"`
Text string `xml:",chardata" json:",omitempty"`
Tokens *Tokens `xml:" tokens,omitempty" json:"tokens,omitempty"`
}
type Sentences struct {
Sentence []*Sentence `xml:" sentence,omitempty" json:"sentence,omitempty"`
}
type Start struct {
Text string `xml:",chardata" json:",omitempty"`
}
type Text struct {
Text string `xml:",chardata" json:",omitempty"`
}
type Token struct {
AttrId string `xml:" id,attr" json:",omitempty"`
CharacterOffsetBegin *CharacterOffsetBegin `xml:" CharacterOffsetBegin,omitempty" json:"CharacterOffsetBegin,omitempty"`
CharacterOffsetEnd *CharacterOffsetEnd `xml:" CharacterOffsetEnd,omitempty" json:"CharacterOffsetEnd,omitempty"`
Lemma *Lemma `xml:" lemma,omitempty" json:"lemma,omitempty"`
NER *NER `xml:" NER,omitempty" json:"NER,omitempty"`
NormalizedNER *NormalizedNER `xml:" NormalizedNER,omitempty" json:"NormalizedNER,omitempty"`
POS *POS `xml:" POS,omitempty" json:"POS,omitempty"`
Speaker *Speaker `xml:" Speaker,omitempty" json:"Speaker,omitempty"`
Timex *Timex `xml:" Timex,omitempty" json:"Timex,omitempty"`
Word *Word `xml:" word,omitempty" json:"word,omitempty"`
}
type Tokens struct {
Token []*Token `xml:" token,omitempty" json:"token,omitempty"`
}
type Word struct {
Text string `xml:",chardata" json:",omitempty"`
}
type Tuple struct {
nsubj, dobj []string
}
func main() {
f, err := os.Open("../data/nlp.txt.xml")
defer f.Close()
if err != nil {
panic(err)
}
r := &Root{}
dec := xml.NewDecoder(f)
err = dec.Decode(r)
if err != nil {
panic(err)
}
for _, s := range r.Document.Sentences.Sentence {
for _, d := range s.Dependencies {
if d.AttrType == "collapsed-dependencies" {
tuples := make(map[string]Tuple)
for _, dep := range d.Dep {
if dep.AttrType == "nsubj" {
t := tuples[dep.Governor.Text]
t.nsubj = append(t.nsubj, dep.Dependent.Text)
tuples[dep.Governor.Text] = t
}
if dep.AttrType == "dobj" {
t := tuples[dep.Governor.Text]
t.dobj = append(t.dobj, dep.Dependent.Text)
tuples[dep.Governor.Text] = t
}
}
for t := range tuples {
if tuples[t].dobj != nil && tuples[t].nsubj != nil {
for _, dobj := range tuples[t].dobj {
for _, nsubj := range tuples[t].nsubj {
fmt.Println(nsubj, "\t", t, "\t", dobj)
}
}
}
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment