Skip to content

Instantly share code, notes, and snippets.

@t-abe
Created October 24, 2010 11:10
Show Gist options
  • Save t-abe/643453 to your computer and use it in GitHub Desktop.
Save t-abe/643453 to your computer and use it in GitHub Desktop.
perl script to use weblio.jp's dictionary
#! /usr/bin/perl
use strict;
use warnings;
use Web::Scraper;
use LWP::UserAgent;
use URI;
use URI::Escape;
use Encode;
use utf8;
binmode STDIN, ":utf8";
binmode STDOUT, ":utf8";
my $word = shift || die;
$word = decode utf8 => $word;
my $items = dictor($word);
if( !$items and ( $word =~ s/ies/y/ or $word =~ s/s$// ) ){
dictor($word);
}
sub dictor {
my $word = shift;
my $uri = sprintf("http://ejje.weblio.jp/content/%s", uri_escape_utf8($word));
#print $uri, "\n";
#exit;
my $ua = LWP::UserAgent->new;
my $res = $ua->get($uri)->decoded_content || die;
# inherently
my $items = 0;
printf("*%s\n", $word);
while ( $res =~ m{<div class="?level(0|1|2)"?>((?:<div class=KnenjSub>.+?</div>)?.+?)</div>}g ) {
my $level = $1;
my $text = $2;
$text =~ s/<.*?>//g;
$text =~ s/[\r\n]//g;
print "\n" if $level == 0 and $items++ != 0;
print "\t" x ($level+1);
print $text, "\n";
}
return $items;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment