Created
June 16, 2012 14:21
-
-
Save sekia/2941456 to your computer and use it in GitHub Desktop.
マルコフ連鎖を使った文章生成サンプル ref: http://qiita.com/items/0fb1730607db04dcff49
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"errors" | |
"fmt" | |
"io" | |
"log" | |
"math/rand" | |
"os" | |
"strings" | |
"time" | |
) | |
type Morpheme struct{ surface, pos string } | |
var ( | |
BOS = Morpheme{surface: "BOS", pos: ""} | |
EOS = Morpheme{surface: "EOS", pos: ""} | |
) | |
func MakeMorphemeFromLine(line string) (morph Morpheme, err error) { | |
fields := strings.Split(line, "\t") | |
if len(fields) != 2 { | |
err = errors.New("Invalid format: \"" + line + "\"") | |
return | |
} | |
surf, rest := fields[0], fields[1] | |
feats := strings.Split(rest, ",") | |
morph = Morpheme{surface: surf, pos: feats[0]} | |
return | |
} | |
type MarkovState int | |
const INITIAL_STATE MarkovState = 0 | |
type MarkovChain struct { | |
currentState MarkovState | |
morphemes []Morpheme | |
chain [][]float64 | |
states map[Morpheme]MarkovState | |
} | |
func MakeMarkovChain(morphs []Morpheme) *MarkovChain { | |
states := map[Morpheme]MarkovState{} | |
nextState := INITIAL_STATE | |
uniqueMorphs := make([]Morpheme, 0) | |
for _, morph := range morphs { | |
if _, ok := states[morph]; !ok { | |
states[morph] = nextState | |
uniqueMorphs = append(uniqueMorphs, morph) | |
nextState++ | |
} | |
} // Now |nextState| holds the number of kinds of morphemes. | |
// Initialize transition count/probability tables. | |
numMorphKinds := int(nextState) | |
transitionCount := make([][]int, numMorphKinds) | |
chain := make([][]float64, numMorphKinds) | |
for i := 0; i < numMorphKinds; i++ { | |
transitionCount[i] = make([]int, numMorphKinds) | |
chain[i] = make([]float64, numMorphKinds) | |
} | |
prevMorph := morphs[0] | |
for i := 1; i < len(morphs); i++ { | |
currMorph := morphs[i] | |
transitionCount[states[prevMorph]][states[currMorph]]++ | |
if currMorph == EOS { | |
i++ | |
if i >= len(morphs) { | |
break | |
} | |
prevMorph = morphs[i] // |morphs[i]| must point BOS. | |
continue | |
} | |
prevMorph = currMorph | |
} | |
for i := 0; i < numMorphKinds; i++ { | |
totalCount := 0 | |
for j := 0; j < numMorphKinds; j++ { | |
totalCount += transitionCount[i][j] | |
} | |
if totalCount == 0 { | |
continue | |
} | |
for j := 0; j < numMorphKinds; j++ { | |
chain[i][j] = float64(transitionCount[i][j]) / float64(totalCount) | |
} | |
} | |
return &MarkovChain{ | |
currentState: INITIAL_STATE, | |
morphemes: uniqueMorphs, | |
chain: chain, | |
states: states, | |
} | |
} | |
func (chain MarkovChain) nextState(state MarkovState) (nextState MarkovState, err error) { | |
if int(state) < 0 || int(state) >= len(chain.morphemes) { | |
err = errors.New("Invalid state.") | |
return | |
} | |
if state == chain.states[EOS] { | |
err = errors.New("No next state.") | |
return | |
} | |
r := rand.Float64() | |
for i, p := range chain.chain[state] { | |
if r < p { | |
nextState = MarkovState(i) | |
return | |
} else { | |
r -= p | |
} | |
} | |
panic("Cannot reach here.") | |
} | |
func (chain MarkovChain) morphemeFor(state MarkovState) (morph Morpheme, err error) { | |
if int(state) < 0 || int(state) >= len(chain.morphemes) { | |
err = errors.New("Invalid state.") | |
return | |
} | |
morph = chain.morphemes[state] | |
return | |
} | |
func (chain *MarkovChain) NextMorpheme() (morph Morpheme) { | |
chain.currentState, _ = chain.nextState(chain.currentState) | |
morph, _ = chain.morphemeFor(chain.currentState) | |
return | |
} | |
func (chain *MarkovChain) GenerateSentence() (sent string) { | |
chain.currentState = INITIAL_STATE | |
for morph := chain.NextMorpheme(); morph != EOS; morph = chain.NextMorpheme() { | |
sent += morph.surface | |
} | |
return | |
} | |
func main() { | |
in := bufio.NewReader(os.Stdin) | |
morphs := make([]Morpheme, 0) | |
for { | |
lineBytes, _, err := in.ReadLine() | |
if err == io.EOF { | |
break | |
} | |
if len(lineBytes) == 0 { | |
continue | |
} | |
morphs = append(morphs, BOS) | |
for { | |
if err != nil { | |
log.Fatal(err) | |
os.Exit(-1) | |
} | |
line := string(lineBytes) | |
if line == "EOS" { | |
break | |
} | |
if morph, err := MakeMorphemeFromLine(line); err == nil { | |
morphs = append(morphs, morph) | |
} else { | |
log.Fatal(err) | |
os.Exit(-1) | |
} | |
lineBytes, _, err = in.ReadLine() | |
} | |
morphs = append(morphs, EOS) | |
} | |
chain := MakeMarkovChain(morphs) | |
rand.Seed(time.Now().Unix()) | |
fmt.Println(chain.GenerateSentence()) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment