Skip to content

Instantly share code, notes, and snippets.

@fuba
Created January 2, 2013 16:24
Show Gist options
  • Save fuba/4435809 to your computer and use it in GitHub Desktop.
Save fuba/4435809 to your computer and use it in GitHub Desktop.
しりとり
#!/usr/bin/perl
use strict;
use warnings;
use utf8;
use Encode;
use YAML::Syck;
use TokyoCabinet;
use Text::KyTea;
use Unicode::Japanese;
use JSON;
use constant {
DIC => $ENV{HOME}.'/src/ipadic-2.7.0/Noun.dic',
DB => $ENV{HOME}.'/.iepg/shiritori_dic.tch',
};
sub shiritori {
my $shiritori_word = shift or return { result => 0 }; # 相手が悪い
warn 'there is db' if (-e DB);
warn $shiritori_word;
shiritori_mkdic(DB, DIC) unless (-e DB);
my $shiritori_result = do_shiritori($shiritori_word, DB);
return { result => -1 } unless ($shiritori_result);
return $shiritori_result;
}
sub do_shiritori {
my ($word, $db) = @_;
my $hdb = TokyoCabinet::HDB->new();
die $hdb->errmsg($hdb->ecode) if (!$hdb->open($db, $hdb->OREADER));
my $yomi_hira = '';
my $kytea = Text::KyTea->new;
my $results = $kytea->parse($word);
for my $result (@{$results}) {
$yomi_hira .= decode_utf8 $result->{tags}->[1]->[0]->{feature};
}
my $yomi = Unicode::Japanese->new($yomi_hira)->hira2kata->getu;
$yomi =~ s/[^\p{InKatakana}]+$//;
$yomi =~ tr/ァィゥェォヵャュョッ/アイウエオカヤユヨツ/;
my $tail = substr($yomi, -1, 1);
return { result => 2 } if ($tail eq 'ン'); # 勝ち
my $cands_json = $hdb->get($tail);
$hdb->close;
return unless ($cands_json); # 負け
my $data = from_json decode_utf8 $cands_json;
return unless ('ARRAY' eq ref $data);
my $result = ${$data}[int(rand(@{$data}))];
$result->{q_yomi} = $yomi;
return $result;
}
sub shiritori_mkdic {
my ($db, $dic) = @_;
my $hdb = TokyoCabinet::HDB->new();
if(!$hdb->open($db, $hdb->OWRITER | $hdb->OCREAT)){
my $ecode = $hdb->ecode();
printf STDERR ("open error: %s\n", $hdb->errmsg($ecode));
}
my %hash;
# (品詞 (名詞 一般)) ((見出し語 (仕舞い 3999)) (読み シマイ) (発音 シマイ) )
open my $dic_fh, '<', $dic;
while (my $line = <$dic_fh>) {
if (my ($surface, $yomi) = (decode('euc-jp', $line) =~ /\(見出し語 \((\S+) \d+\)\).*\(発音 ([^\)]+)\)/)) {
my $head = substr $yomi, 0, 1;
my $tail = substr $yomi, -1, 1;
my $result = ($tail eq 'ン') ? -1 : 1;
$hash{$head} ||= +[];
push @{$hash{$head}}, +{
surface => $surface,
yomi => $yomi,
result => $result,
};
}
}
close $dic_fh;
for my $head (keys %hash) {
$hdb->put(encode_utf8($head), to_json($hash{$head}));
}
$hdb->close;
}
1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment