Created
August 8, 2010 04:59
-
-
Save gologo13/513610 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use feature qw( say ); | |
# Inspired by http://d.hatena.ne.jp/syou6162/20100708/1278577199 | |
# state-emission HMM | |
# 状態遷移確率 | |
# 品詞s_iから品詞s_jへ遷移する確率 | |
my @a = ([0.3, 0.0, 0.4, 0.1, 0.2], | |
[0.7, 0.0, 0.0, 0.3, 0.0], | |
[0.3, 0.2, 0.1, 0.2, 0.2], | |
[0.5, 0.1, 0.0, 0.4, 0.0], | |
[0.6, 0.3, 0.0, 0.1, 0.0]); | |
# 記号生成確率 | |
# 品詞s_iから単語w_jを生成する確率 | |
# 単語は観測された順 | |
my @b = ([0.6, 0.1, 0.0, 0.0, 0.3], | |
[0.0, 0.0, 0.0, 1.0, 0.0], | |
[0.1, 0.2, 0.7, 0.0, 0.0], | |
[0.0, 0.0, 1.0, 0.0, 0.0], | |
[0.0, 0.0, 1.0, 0.0, 0.0]); | |
# 観測系列 | |
my @o = (0, 1, 2, 3, 4); | |
# 品詞の初期状態の確率 | |
my @pi = (0.6, 0.4, 0.0, 0.0, 0.0); | |
say forward(\@a, \@b, \@o, \@pi); | |
say backward(\@a, \@b, \@o, \@pi); | |
say viterbi(\@a, \@b, \@o, \@pi); | |
# Forward Algorithm | |
sub forward { | |
(@_ == 4) || die $!; | |
my ($a, $b, $o, $pi) = @_; | |
my @a = @$a; | |
my @b = @$b; | |
my @o = @$o; | |
my @pi = @$pi; | |
my $T = $#o+1; # total time | |
my $N = $#pi+1; # the number of the states | |
# initial step (t = 0) | |
my @alpha; | |
for my $i (0 .. $N-1) { | |
$alpha[0][$i] = $pi[$i] * $b[$i][0]; | |
} | |
# inductive step (t = 1 ~ $T-1) | |
for my $t (1 .. $T-1) { | |
for my $i (0 .. $N-1) { | |
my $sum = 0; | |
for my $j (0 .. $N-1) { | |
$sum += $alpha[$t-1][$j] * $a[$j][$i]; | |
} | |
$alpha[$t][$i] = $sum * $b[$i][$t]; | |
} | |
} | |
# final step | |
my $prob = 0; | |
for my $i (0 .. $N-1) { | |
$prob += $alpha[$T-1][$i]; | |
} | |
return $prob; | |
} | |
sub backward { | |
(@_ == 4) || die $!; | |
my ($a, $b, $o, $pi) = @_; | |
my @a = @$a; | |
my @b = @$b; | |
my @o = @$o; | |
my @pi = @$pi; | |
my $T = $#o+1; # total time | |
my $N = $#pi+1; # the number of the states | |
# initial step (t = $T-1) | |
my @beta; | |
for my $i (0 .. $N-1) { | |
$beta[$T-1][$i] = $b[$i][$T-1]; | |
} | |
# indeuctive (t = $T-2 .. 0) | |
for my $t (reverse 0 .. $T-2) { | |
for my $i (0 .. $N-1) { | |
my $sum = 0; | |
for my $j (0 .. $N-1) { | |
$sum += $beta[$t+1][$j] * $a[$i][$j]; | |
} | |
$beta[$t][$i] = $sum * $b[$i][$t]; | |
} | |
} | |
# final step | |
my $prob = 0; | |
for my $i (0 .. $N-1) { | |
$prob += $beta[0][$i] * $pi[$i]; | |
} | |
return $prob; | |
} | |
# Viterbi Algorithm | |
sub viterbi { | |
(@_ == 4) || die $!; | |
my ($a, $b, $o, $pi) = @_; | |
my @a = @$a; | |
my @b = @$b; | |
my @o = @$o; | |
my @pi = @$pi; | |
my $T = $#o; # total time | |
my $N = $#pi+1; # the number of the states | |
warn "\$T = $T, \$N = $N\n"; | |
# initial step (t = 0) | |
my (@delta, @psi); | |
for my $i (0 .. $N-1) { | |
$delta[0][$i] = $pi[$i] * $b[$i][0]; | |
$psi[0][$i] = 0; | |
} | |
# inductive step (t = 1 .. $T) | |
for my $t (1 .. $T) { | |
for my $i (0 .. $N-1) { | |
my $max = 0; | |
for my $j (0 .. $N-1) { | |
my $prob = $delta[$t-1][$j] * $a[$j][$i]; | |
if ($max < $prob) { | |
$max = $prob; | |
$psi[$t][$i] = $j; | |
} | |
} | |
$delta[$t][$i] = $max * $b[$i][$t]; | |
} | |
} | |
# final step | |
# 時刻Tにおいて最尤な状態を探索 | |
my ($max_p_T, $q_T) = (0, 0); | |
for my $i (0 .. $N-1) { | |
if ($max_p_T < $delta[$T][$i]) { | |
$max_p_T = $delta[$T][$i]; | |
$q_T = $i; | |
} | |
} | |
# 最尤な状態 $q_T からバックポインタをたどる | |
my @max_q; | |
$max_q[$T] = $q_T; | |
# 番号と品詞の対応付け: 0(名詞)、1(冠詞)、2(動詞)、3(形容詞)、4(前置詞) | |
my @S = qw/名詞 冠詞 動詞 形容詞 前置詞/; | |
my @W = qw/Time flies like an arrow/; | |
for my $t (reverse 0 .. $T) { | |
$max_q[$t] = $psi[$t+1][$max_q[$t+1]]; | |
} | |
print "$W[$_]/$S[$max_q[$_]] " for (0 .. $T); | |
return $max_p_T; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment