Skip to content

Instantly share code, notes, and snippets.

@cipepser
Last active March 22, 2017 12:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cipepser/c6be3f30935d9f1032cb8dd29b3a6f8e to your computer and use it in GitHub Desktop.
Save cipepser/c6be3f30935d9f1032cb8dd29b3a6f8e to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"os"
"io"
"bufio"
"strings"
)
func main() {
f, err := os.Open("../data/neko.txt.mecab")
defer f.Close()
if err != nil {
panic(err)
}
r := bufio.NewReader(f)
sents := make([][]map[string]string, 0)
sent := make([]map[string]string, 0)
for {
b, _, err := r.ReadLine()
if err == io.EOF {
break
}
// store morpheme which is not "EOS" into maps
if string(b) != "EOS" {
// split by tab and comma
tmp := strings.Split(string(b), "\t")
m := append(tmp[:1], strings.Split(tmp[1], ",")...)
morpheme := make(map[string]string)
morpheme["surface"] = m[0]
morpheme["base"] = m[7]
morpheme["pos"] = m[1]
morpheme["pos1"] = m[2]
sent = append(sent, morpheme)
} else { // if we find "EOS", store sentence to sentences and initialize the sent
if len(sent) > 0 { // for appearing "EOS" continuously
sents = append(sents, sent)
sent = make([]map[string]string, 0)
}
}
}
fmt.Println(sents[1])
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment