Skip to content

Instantly share code, notes, and snippets.

@gologo13
Created August 8, 2010 04:59
Show Gist options
  • Save gologo13/513610 to your computer and use it in GitHub Desktop.
Save gologo13/513610 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
use strict;
use feature qw( say );
# Inspired by http://d.hatena.ne.jp/syou6162/20100708/1278577199
# state-emission HMM
# 状態遷移確率
# 品詞s_iから品詞s_jへ遷移する確率
my @a = ([0.3, 0.0, 0.4, 0.1, 0.2],
[0.7, 0.0, 0.0, 0.3, 0.0],
[0.3, 0.2, 0.1, 0.2, 0.2],
[0.5, 0.1, 0.0, 0.4, 0.0],
[0.6, 0.3, 0.0, 0.1, 0.0]);
# 記号生成確率
# 品詞s_iから単語w_jを生成する確率
# 単語は観測された順
my @b = ([0.6, 0.1, 0.0, 0.0, 0.3],
[0.0, 0.0, 0.0, 1.0, 0.0],
[0.1, 0.2, 0.7, 0.0, 0.0],
[0.0, 0.0, 1.0, 0.0, 0.0],
[0.0, 0.0, 1.0, 0.0, 0.0]);
# 観測系列
my @o = (0, 1, 2, 3, 4);
# 品詞の初期状態の確率
my @pi = (0.6, 0.4, 0.0, 0.0, 0.0);
say forward(\@a, \@b, \@o, \@pi);
say backward(\@a, \@b, \@o, \@pi);
say viterbi(\@a, \@b, \@o, \@pi);
# Forward Algorithm
sub forward {
(@_ == 4) || die $!;
my ($a, $b, $o, $pi) = @_;
my @a = @$a;
my @b = @$b;
my @o = @$o;
my @pi = @$pi;
my $T = $#o+1; # total time
my $N = $#pi+1; # the number of the states
# initial step (t = 0)
my @alpha;
for my $i (0 .. $N-1) {
$alpha[0][$i] = $pi[$i] * $b[$i][0];
}
# inductive step (t = 1 ~ $T-1)
for my $t (1 .. $T-1) {
for my $i (0 .. $N-1) {
my $sum = 0;
for my $j (0 .. $N-1) {
$sum += $alpha[$t-1][$j] * $a[$j][$i];
}
$alpha[$t][$i] = $sum * $b[$i][$t];
}
}
# final step
my $prob = 0;
for my $i (0 .. $N-1) {
$prob += $alpha[$T-1][$i];
}
return $prob;
}
sub backward {
(@_ == 4) || die $!;
my ($a, $b, $o, $pi) = @_;
my @a = @$a;
my @b = @$b;
my @o = @$o;
my @pi = @$pi;
my $T = $#o+1; # total time
my $N = $#pi+1; # the number of the states
# initial step (t = $T-1)
my @beta;
for my $i (0 .. $N-1) {
$beta[$T-1][$i] = $b[$i][$T-1];
}
# indeuctive (t = $T-2 .. 0)
for my $t (reverse 0 .. $T-2) {
for my $i (0 .. $N-1) {
my $sum = 0;
for my $j (0 .. $N-1) {
$sum += $beta[$t+1][$j] * $a[$i][$j];
}
$beta[$t][$i] = $sum * $b[$i][$t];
}
}
# final step
my $prob = 0;
for my $i (0 .. $N-1) {
$prob += $beta[0][$i] * $pi[$i];
}
return $prob;
}
# Viterbi Algorithm
sub viterbi {
(@_ == 4) || die $!;
my ($a, $b, $o, $pi) = @_;
my @a = @$a;
my @b = @$b;
my @o = @$o;
my @pi = @$pi;
my $T = $#o; # total time
my $N = $#pi+1; # the number of the states
warn "\$T = $T, \$N = $N\n";
# initial step (t = 0)
my (@delta, @psi);
for my $i (0 .. $N-1) {
$delta[0][$i] = $pi[$i] * $b[$i][0];
$psi[0][$i] = 0;
}
# inductive step (t = 1 .. $T)
for my $t (1 .. $T) {
for my $i (0 .. $N-1) {
my $max = 0;
for my $j (0 .. $N-1) {
my $prob = $delta[$t-1][$j] * $a[$j][$i];
if ($max < $prob) {
$max = $prob;
$psi[$t][$i] = $j;
}
}
$delta[$t][$i] = $max * $b[$i][$t];
}
}
# final step
# 時刻Tにおいて最尤な状態を探索
my ($max_p_T, $q_T) = (0, 0);
for my $i (0 .. $N-1) {
if ($max_p_T < $delta[$T][$i]) {
$max_p_T = $delta[$T][$i];
$q_T = $i;
}
}
# 最尤な状態 $q_T からバックポインタをたどる
my @max_q;
$max_q[$T] = $q_T;
# 番号と品詞の対応付け: 0(名詞)、1(冠詞)、2(動詞)、3(形容詞)、4(前置詞)
my @S = qw/名詞 冠詞 動詞 形容詞 前置詞/;
my @W = qw/Time flies like an arrow/;
for my $t (reverse 0 .. $T) {
$max_q[$t] = $psi[$t+1][$max_q[$t+1]];
}
print "$W[$_]/$S[$max_q[$_]] " for (0 .. $T);
return $max_p_T;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment