Skip to content

Instantly share code, notes, and snippets.

@sekia
Created November 19, 2011 22:47
Show Gist options
  • Save sekia/1379481 to your computer and use it in GitHub Desktop.
Save sekia/1379481 to your computer and use it in GitHub Desktop.
Scrapes table of japanese traditional colors on colordic.org and dumps color name / RGB value mappings as YAML
#!/usr/bin/env perl
use 5.014;
use strict;
use warnings;
use Carp;
use Encode;
use Furl;
use Web::Scraper;
use YAML::Any;
my $dic_url = shift // 'http://www.colordic.org/w/';
sub trim(_) {
shift =~ s/^\s+|\s+$//gr;
}
sub parse_html_color_code(_) {
my $code = shift;
unless ($code =~ /^#(?:[0-9a-f]{3}|[0-9a-f]{6})$/i) {
croak "Unknown format: $code";
}
$code =~ s/^#//;
map { hex } do {
given (length $code) {
when (3) { map { $_ x 2 } split //, $code; }
when (6) { $code =~ /([0-9a-f]{2})/gi; }
}
};
}
my $color_list_scraper = scraper {
process '.colortable td', 'colors[]' => scraper {
process 'a', 'href' => '@href', 'name_and_code' => '@title',
'reading' => scraper { process 'span', 'reading' => 'TEXT' };
};
};
my $res = Furl->new->get($dic_url);
my $scraped = $color_list_scraper->scrape(decode_utf8 $res->content);
my %colors;
for my $color (@{ $scraped->{colors} }) {
my ($kanji_name, $html_code) = split /\s+/, trim $color->{name_and_code};
my $reading = $color->{reading}{reading};
$colors{$kanji_name, $reading} = [parse_html_color_code $html_code];
}
say encode_utf8 Dump(\%colors);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment