Skip to content

Instantly share code, notes, and snippets.

@sekia
Created June 16, 2012 14:21
Show Gist options
  • Save sekia/2941456 to your computer and use it in GitHub Desktop.
Save sekia/2941456 to your computer and use it in GitHub Desktop.
マルコフ連鎖を使った文章生成サンプル ref: http://qiita.com/items/0fb1730607db04dcff49
package main
import (
"bufio"
"errors"
"fmt"
"io"
"log"
"math/rand"
"os"
"strings"
"time"
)
type Morpheme struct{ surface, pos string }
var (
BOS = Morpheme{surface: "BOS", pos: ""}
EOS = Morpheme{surface: "EOS", pos: ""}
)
func MakeMorphemeFromLine(line string) (morph Morpheme, err error) {
fields := strings.Split(line, "\t")
if len(fields) != 2 {
err = errors.New("Invalid format: \"" + line + "\"")
return
}
surf, rest := fields[0], fields[1]
feats := strings.Split(rest, ",")
morph = Morpheme{surface: surf, pos: feats[0]}
return
}
type MarkovState int
const INITIAL_STATE MarkovState = 0
type MarkovChain struct {
currentState MarkovState
morphemes []Morpheme
chain [][]float64
states map[Morpheme]MarkovState
}
func MakeMarkovChain(morphs []Morpheme) *MarkovChain {
states := map[Morpheme]MarkovState{}
nextState := INITIAL_STATE
uniqueMorphs := make([]Morpheme, 0)
for _, morph := range morphs {
if _, ok := states[morph]; !ok {
states[morph] = nextState
uniqueMorphs = append(uniqueMorphs, morph)
nextState++
}
} // Now |nextState| holds the number of kinds of morphemes.
// Initialize transition count/probability tables.
numMorphKinds := int(nextState)
transitionCount := make([][]int, numMorphKinds)
chain := make([][]float64, numMorphKinds)
for i := 0; i < numMorphKinds; i++ {
transitionCount[i] = make([]int, numMorphKinds)
chain[i] = make([]float64, numMorphKinds)
}
prevMorph := morphs[0]
for i := 1; i < len(morphs); i++ {
currMorph := morphs[i]
transitionCount[states[prevMorph]][states[currMorph]]++
if currMorph == EOS {
i++
if i >= len(morphs) {
break
}
prevMorph = morphs[i] // |morphs[i]| must point BOS.
continue
}
prevMorph = currMorph
}
for i := 0; i < numMorphKinds; i++ {
totalCount := 0
for j := 0; j < numMorphKinds; j++ {
totalCount += transitionCount[i][j]
}
if totalCount == 0 {
continue
}
for j := 0; j < numMorphKinds; j++ {
chain[i][j] = float64(transitionCount[i][j]) / float64(totalCount)
}
}
return &MarkovChain{
currentState: INITIAL_STATE,
morphemes: uniqueMorphs,
chain: chain,
states: states,
}
}
func (chain MarkovChain) nextState(state MarkovState) (nextState MarkovState, err error) {
if int(state) < 0 || int(state) >= len(chain.morphemes) {
err = errors.New("Invalid state.")
return
}
if state == chain.states[EOS] {
err = errors.New("No next state.")
return
}
r := rand.Float64()
for i, p := range chain.chain[state] {
if r < p {
nextState = MarkovState(i)
return
} else {
r -= p
}
}
panic("Cannot reach here.")
}
func (chain MarkovChain) morphemeFor(state MarkovState) (morph Morpheme, err error) {
if int(state) < 0 || int(state) >= len(chain.morphemes) {
err = errors.New("Invalid state.")
return
}
morph = chain.morphemes[state]
return
}
func (chain *MarkovChain) NextMorpheme() (morph Morpheme) {
chain.currentState, _ = chain.nextState(chain.currentState)
morph, _ = chain.morphemeFor(chain.currentState)
return
}
func (chain *MarkovChain) GenerateSentence() (sent string) {
chain.currentState = INITIAL_STATE
for morph := chain.NextMorpheme(); morph != EOS; morph = chain.NextMorpheme() {
sent += morph.surface
}
return
}
func main() {
in := bufio.NewReader(os.Stdin)
morphs := make([]Morpheme, 0)
for {
lineBytes, _, err := in.ReadLine()
if err == io.EOF {
break
}
if len(lineBytes) == 0 {
continue
}
morphs = append(morphs, BOS)
for {
if err != nil {
log.Fatal(err)
os.Exit(-1)
}
line := string(lineBytes)
if line == "EOS" {
break
}
if morph, err := MakeMorphemeFromLine(line); err == nil {
morphs = append(morphs, morph)
} else {
log.Fatal(err)
os.Exit(-1)
}
lineBytes, _, err = in.ReadLine()
}
morphs = append(morphs, EOS)
}
chain := MakeMarkovChain(morphs)
rand.Seed(time.Now().Unix())
fmt.Println(chain.GenerateSentence())
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment