Skip to content

Instantly share code, notes, and snippets.

@nqounet
Created January 28, 2017 02:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nqounet/de4b437d7f29450cbf45e96a2c80d09b to your computer and use it in GitHub Desktop.
Save nqounet/de4b437d7f29450cbf45e96a2c80d09b to your computer and use it in GitHub Desktop.
#Perl鍋 #14(2017-01-26)
#!/usr/bin/env perl
use utf8;
use v5.16;
use strict;
use warnings;
use feature 'say';
use Data::Printer {deparse => 1, caller_info => 1};
use Web::Query;
use Text::Shirasu;
wq('http://blog.yapcjapan.org/entry/2017/01/26/080000')->find('div.entry-content')->each(
sub {
my $text = $_->text;
my $ts = Text::Shirasu->new(dicdir => '/usr/local/lib/mecab/dic/ipadic');
my $normalize = $ts->normalize($text);
$ts->parse($normalize);
$ts->filter(type => [qw/名詞/]);
for my $node (@{$ts->nodes}) {
say $node->surface;
}
}
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment