Skip to content

Instantly share code, notes, and snippets.

@martirsadota
Last active August 29, 2015 14:25
Show Gist options
  • Save martirsadota/e3ed2df26472ecfa8db5 to your computer and use it in GitHub Desktop.
Save martirsadota/e3ed2df26472ecfa8db5 to your computer and use it in GitHub Desktop.
Kancolle Wiki parser
#!/usr/bin/perl -X
#----------------------------------------------------------------------------------
# kancolleparser.pl
#----------------------------------------------------------------------------------
# Small utility script that fetches a shipgirl page and parses info from it
# Parse output is sent to stdout (as JSON)
# (You can also plug in other Wiki pages to it but don't expect anything)
#
# To use:
# kancolleparser.pl <shipgirl name (case-sensitive)>
#
# NOTE: executing it with no parameters will fetch info about my shipwaifu.
#
# NOTE #2: if it doesn't parse your shipwaifu's info, chances are that page
# is still stuck with the old infotable, which this parser could not (and
# will not—stop complaining) handle. They will eventually™ roll out the
# new table format to all the shipgirl pages, so just be patient (or do it
# yourself.)
#----------------------------------------------------------------------------------
#----------------------------------------------------------------------------------
# The MIT License (MIT)
#----------------------------------------------------------------------------------
#
# Copyright (c) 2015 martirsadota
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#----------------------------------------------------------------------------------
#----------------------------------------------------------------------------------
# CODE STARTS HERE
#----------------------------------------------------------------------------------
use strict;
use warnings;
use HTTP::Tiny;
use URI::Escape;
use JSON;
my $term = uri_escape(shift) || "Bucky";
my $blah = HTTP::Tiny->new->get("http://kancolle.wikia.com/wiki/$term");
if ($blah->{success}) {
print to_json(&Shallie($blah->{content}),{pretty => 1, canonical => 1});
} else {
die "Unable to fetch page: $blah->{reason}";
}
sub Shallie {
# Shallie, Alchemist of the Dusk Sea
# Kancolle Wiki infoparser
# parses the ShipInfoKai template, soon to be rolled out to all shipgirl pages (as of this writing)
my $blah = shift;
my @ships;
while ($blah =~ m{
(<table.class..typography.xl.optout.infobox..id....style..border..1px.solid.darkgray..width..320px..background.color...fafafa..float..none..>.*?<.table>)
}isgcx) {
push @ships, &shallie($1); #fuck code maintainability (also Atelier Shallie reference);
}
return \@ships;
}
sub shallie {
# Shallie, Alchemist of the Dusk Sea
# Kancolle Wiki infoparser
# parses the ShipInfoKai template, soon to be rolled out to all shipgirl pages (as of this writing)
# This is the Shallie that does much of the actual work
$_ = shift;
my $info;
if (m{<td.colspan..6..style..position..relative..text-align..center..background-color.*?title..(?<rarity>.*?).>}i) {
$info->{rarity} = $+{rarity};
}
if (m{<strong.class..selflink.>(?<name>.*?)<.strong>}i) {
$info->{name} = $+{name};
}
if (m{<div.style..font-size.*?><b>No.(?:\s?|<span.title.*?help..>)(?<ship_num>\d+).*?
<span.title..(?<name_jp_kana>.*?)..lang.*?help..>
(?<name_jp_kanji>.*?)<.span>}ix) {
$info->{name_jp}->{kanji} = $+{name_jp_kanji};
$info->{name_jp}->{kana} = $+{name_jp_kana};
$info->{ship_num} = $+{ship_num}+0;
}
if (m{<p><b>(?<class>.*?)</b></p>}i) {
$info->{class} = $+{class};
}
if (m{ .*HP\n
.*?Maximum.after.marriage..(?<hp_max>\d+)..style..cursor:help..>(?<hp>\d+)</span></b>\n
<.td.*Firepower\n
</td.*b>(?<fp>\d+)\s\((?<fp_max>\d+)\)</b>\n
.*\n
.*\n
.*Armor\n
<.td.*b>(?<armor>\d+)\s\((?<armor_max>\d+)\)</b>\n
<.td.*Torpedo\n
<.td.*b>(?|(?<torp>\d+)|(?<torp>\d+)\s\((?<torp_max>\d+)\))</b>\n
.*\n
.*\n
.*Evasion\n
<.td.*b>(?<evade>\d+)\s\((?<evade_max>\d+)\)</b>\n
.*AA\n
<.td.*b>(?|(?<aa>\d+)|(?<aa>\d+)\s\((?<aa_max>\d+)\))</b>\n
.*\n
.*\n
.*Aircraft\n
<.td.*b>(?<plane>\d+)</b>\n
.*ASW\n
<.td.*b>(?|(?<asw>\d+)|(?<asw>\d+)\s\((?<asw_max>\d+)\))</b>\n
.*\n
.*\n
.*Speed\n
<.td.*b>(?<speed>.*?)</b>\n
.*LOS\n
<.td.*b>(?<los>\d+)\s\((?<los_max>\d+)\)</b>\n
.*\n
.*\n
.*Range\n
<.td.*b>(?<range>.*?)</b>\n
.*Luck\n
<.td.*b>(?<luck>\d+)\s\((?<luck_max>\d+)\)</b>\n
}ix) {
$info->{stats} = {
# yeah fuck typing (in all senses)
hp => $+{hp}+0,
hp_max => $+{hp_max}+0,
fp => $+{fp}+0,
fp_max => $+{fp_max}+0,
armor => $+{armor}+0,
armor_max => $+{armor_max}+0,
torp => $+{torp}+0,
torp_max => $+{torp_max}+0 || undef,
evade => $+{evade}+0,
evade_max => $+{evade_max}+0,
aa => $+{aa}+0,
aa_max => $+{aa_max}+0 || undef,
plane => $+{plane}+0,
asw => $+{asw}+0,
asw_max => $+{asw_max}+0 || undef,
speed => $+{speed},
los => $+{los}+0,
los_max => $+{los_max}+0,
range => $+{range},
luck => $+{luck}+0,
luck_max => $+{luck_max}+0,
};
}
if (m{
<td.*?><b>Build.Time<.b>\n
.*\n
.*\n
.*\n
<td.*center..>(?:<span.*>)?(?|(?<build_time>(?:\d\d:)?\d\d:\d\d)|(?:.*<i>)?(?<build_time>Unbuildable)(?:<.i>)?)
}ix) {
$info->{build_time} = $+{build_time};
} else { $info->{build_time} = undef; }
if (m{<a.href...wiki.Category.Ships_buildable.*>Normal</a>}i) {
$info->{buildable}->{normal} = JSON::true;
} else { $info->{buildable}->{normal} = JSON::false; }
if (m{<a.href...wiki.Category.Ships_buildable.*>LSC</a>}i) {
$info->{buildable}->{lsc} = JSON::true;
} else { $info->{buildable}->{lsc} = JSON::false; }
if (m{
<td.*b>Remodel.Level<.b>\n
.*\n
.*\n
.*\n
<td.*?center..><b>Level\s(?<remodel_level>\d+)<.b>..<a.*?a>\s(?<remodel_ammo>\d+)\s<a.*?a>\s(?<remodel_steel>\d+)
}ix) {
$info->{is_remodel} = JSON::true;
$info->{remodel_level} = $+{remodel_level}+0;
$info->{remodel_cost} = {
ammo => $+{remodel_ammo}+0,
steel => $+{remodel_steel}+0,
};
} else {
$info->{is_remodel} = JSON::false;
$info->{remodel_level} = undef;
$info->{remodel_cost} = {
ammo => undef,
steel => undef,
};
}
if (m{
<t.*>Stock.Equipment<.b>\n
.*\n
.*\n
.*\n
<td.*?>(?:\s*<a.*title.\"(?<slot_1_type>.*?)\"\s+>.*)?\n
<.td><td.*?>\s(?|.*?<span.*common-translation.>(?<slot_1_name>.*?)<.span><.a>|<a.*>(?<slot_1_name>.*?)<.a>|(?<slot_1_name>.*?))\n
<.td><td.*>\s(?<slot_1_planecap>.*?)\n
.*\n
.*\n
<td.*?>(?:\s*<a.*title.\"(?<slot_2_type>.*?)\"\s+>.*)?\n
<.td><td.*?>\s(?|.*?<span.*common-translation.>(?<slot_2_name>.*?)<.span><.a>|<a.*>(?<slot_2_name>.*?)<.a>|(?<slot_2_name>.*?))\n
<.td><td.*>\s(?<slot_2_planecap>.*?)\n
.*\n
.*\n
<td.*?>(?:\s*<a.*title.\"(?<slot_3_type>.*?)\"\s+>.*)?\n
<.td><td.*?>\s(?|.*?<span.*common-translation.>(?<slot_3_name>.*?)<.span><.a>|<a.*>(?<slot_3_name>.*?)<.a>|(?<slot_3_name>.*?))\n
<.td><td.*>\s(?<slot_3_planecap>.*?)\n
.*\n
.*\n
<td.*?>(?:\s*<a.*title.\"(?<slot_4_type>.*?)\"\s+>.*)?\n
<.td><td.*?>\s(?|.*?<span.*common-translation.>(?<slot_4_name>.*?)<.span><.a>|<a.*>(?<slot_4_name>.*?)<.a>|(?<slot_4_name>.*?))\n
<.td><td.*>\s(?<slot_4_planecap>.*?)\n
}ix) {
my %equip = %+;
$info->{stock_equip} = [
{
name => $equip{slot_1_name},
type => $equip{slot_1_type} || undef,
plane_cap => $equip{slot_1_planecap} eq '-' ? undef : $equip{slot_1_planecap}+0,
is_locked => $equip{slot_1_name} =~ /locked/i ? JSON::true : JSON::false,
is_empty => $equip{slot_1_name} =~ /unequipped/i ? JSON::true : JSON::false
},
{
name => $equip{slot_2_name},
type => $equip{slot_2_type} || undef,
plane_cap => $equip{slot_2_planecap} eq '-' ? undef : $equip{slot_2_planecap}+0,
is_locked => $equip{slot_2_name} =~ /locked/i ? JSON::true : JSON::false,
is_empty => $equip{slot_2_name} =~ /unequipped/i ? JSON::true : JSON::false
},
{
name => $equip{slot_3_name},
type => $equip{slot_3_type} || undef,
plane_cap => $equip{slot_3_planecap} eq '-' ? undef : $equip{slot_3_planecap}+0,
is_locked => $equip{slot_3_name} =~ /locked/i ? JSON::true : JSON::false,
is_empty => $equip{slot_3_name} =~ /unequipped/i ? JSON::true : JSON::false
},
{
name => $equip{slot_4_name},
type => $equip{slot_4_type} || undef,
plane_cap => $equip{slot_4_planecap} eq '-' ? undef : $equip{slot_4_planecap}+0,
is_locked => $equip{slot_4_name} =~ /locked/i ? JSON::true : JSON::false,
is_empty => $equip{slot_4_name} =~ /unequipped/i ? JSON::true : JSON::false
},
];
}
return $info;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment