Created
January 22, 2015 08:07
-
-
Save konoha81/9383fdb40c00ec5d51b8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env/ perl | |
use strict; | |
use utf8; | |
use Encode; | |
print("読み込みたいテキストの名前を入力してください\n"); | |
my $text = <STDIN>; | |
chomp($text); | |
open(IN, "$text\.txt"); | |
# 頻度表を表すハッシュ | |
my %freq = (); | |
# 総単語数 | |
my $total = 0; | |
# タグの個数をカウント | |
while(<IN>){ | |
# テキストはコンマ区切りなので配列に分割 | |
my @taglist = split(/,/); | |
for(my $i=0;$i<$#taglist;$i++){ | |
if(defined($freq{$taglist[$i]})){ | |
$freq{$taglist[$i]}++; | |
}else{ | |
$freq{$taglist[$i]} = 1; | |
} | |
} | |
$total++; | |
} | |
close(IN); | |
print($total,"\n"); | |
open(OUT,"> $text\.prob"); | |
foreach my $tags (keys %freq){ | |
my $prob = $freq{$tags}/$total; #タグの出現確率 | |
print OUT "$tags\t$prob\n"; | |
} | |
close(OUT); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment