Skip to content

Instantly share code, notes, and snippets.

@motemen
Created June 27, 2011 10:48
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save motemen/1048663 to your computer and use it in GitHub Desktop.
Save motemen/1048663 to your computer and use it in GitHub Desktop.
highlight text by mecab segmentation
use strict;
use warnings;
use opts;
use Text::MeCab;
use Encode;
use Encode::Locale;
use Term::ANSIColor;
my @COLORS = qw(
yellow red blue green cyan magenta
);
opts my $legend => 'Bool';
Encode::Locale::decode_argv;
binmode(STDIN, ':encoding(console_in)');
binmode(STDOUT, ':encoding(console_out)');
my @patterns = @ARGV;
if ($legend) {
for (0 .. $#patterns) {
print color $COLORS[ $_ % @COLORS ];
print $patterns[$_];
print color 'reset';
print ' ';
}
print "\n";
}
my $mecab = Text::MeCab->new;
my $enc = Text::MeCab::ENCODING;
while (my $line = <STDIN>) {
chomp $line;
for (my $node = $mecab->parse(encode($enc => $line)); $node && defined $node->surface; $node = $node->next) {
my $feature = decode($enc => $node->feature);
my $color;
for (0 .. $#patterns) {
if ($feature =~ $patterns[$_]) {
$color = $COLORS[ $_ % @COLORS ];
last;
}
}
print color $color if $color;
print decode($enc => $node->surface);
print color 'reset' if $color;
}
print "\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment