Skip to content

Instantly share code, notes, and snippets.

@odanado
Last active August 29, 2015 14:01
Show Gist options
  • Save odanado/22be1a769a5356c62f57 to your computer and use it in GitHub Desktop.
Save odanado/22be1a769a5356c62f57 to your computer and use it in GitHub Desktop.
serebii.net からデータを落としてくる
# -*- coding: utf-8 -*-
require "nokogiri"
require "open-uri"
require "sqlite3"
def get_base_stats(doc)
base_stats = Array.new
data = doc.xpath("//*/text()[contains(.,'Base Stats')]/../..")
data.each do |ele|
array = Array.new
temp = ele.xpath("./td/text()")
for i in 1..temp.length-1 do
array.push(temp[i].to_s)
end
base_stats.push(array)
end
return base_stats
end
def get_name(doc)
name = Array.new
title = doc.css('title').text.split(" ")[0]
doc = doc.xpath("//*/b/text()[contains(.,'Stats')]")
doc.each do |ele|
forme = ele.to_s.split(" ")[2]
if forme.nil?
name.push(title)
elsif forme.include?("Mega")
name.push("Mega-#{title}")
else
name.push("#{title}-#{forme}")
end
end
if title == "Charizard"
name[1]+="-X"
name[2]+="-Y"
end
return name
end
def get_name_ja(doc, name)
name_ja = Array.new
begin
doc = doc.xpath("//*/td[@class='fooinfo' and \
./text()[contains(.,\"#{name[0]}\")]]")
doc = doc.xpath("../td/table/tr/td/b[text()='Japan']")
text_node_ja = doc.xpath("../../td/text()")
rescue => e
p name[0]
p e
end
name_ja[0] = text_node_ja[2].to_s \
.gsub(/\&#(\d+);/){[$1.to_i].pack('U')}
for i in 1..name.length-1 do
name_ja[i] = name[i].gsub(/#{name[0]}/,"#{name_ja[0]}")
name_ja[i] = name_ja[i].gsub(/Mega-/,"メガ")
name_ja[i] = name_ja[i].gsub(/-Small/,"(小)")
name_ja[i] = name_ja[i].gsub(/-Large/,"(大)")
name_ja[i] = name_ja[i].gsub(/-Super/,"(特大)")
name_ja[i] = name_ja[i].gsub(/-Alternate/,"FC")
name_ja[i] = name_ja[i].gsub(/-Attack/,"(アタック)")
name_ja[i] = name_ja[i].gsub(/-Defense/,"(デフェンス)")
name_ja[i] = name_ja[i].gsub(/-Speed/,"(スピード)")
name_ja[i] = name_ja[i].gsub(/-Sandy/,"(砂地)")
name_ja[i] = name_ja[i].gsub(/-Trash/,"(ゴミ)")
name_ja[i] = name_ja[i].gsub(/-Zen/,"(ダルマ)")
name_ja[i] = name_ja[i].gsub(/-Sky/,"(スカイ)")
name_ja[i] = name_ja[i].gsub(/-Therian/,"(霊獣)")
name_ja[i] = name_ja[i].gsub(/-Origin/,"(オリジン)")
name_ja[i] = name_ja[i].gsub(/-Black/,"(黒)")
name_ja[i] = name_ja[i].gsub(/-White/,"(白)")
name_ja[i] = name_ja[i].gsub(/-Blade/,"(矛)")
name_ja[i] = name_ja[i].gsub(/-Pirouette/,"(ステップ)")
name_ja[i] = name_ja[i].gsub(/-Eternal/,"(AZ)")
name_ja[i] = name_ja[i].gsub(/-X/,"X")
name_ja[i] = name_ja[i].gsub(/-Y/,"Y")
end
return name_ja
end
def create_db()
name = Array.new
base_stats = Array.new
name_ja = Array.new
db = SQLite3::Database.new("pokemon.db")
db.execute("create table if not exists base_stats(_id integer primary key autoincrement, name, hp, attack, defense, sp_atk, sp_def, speed );")
db.execute("create table if not exists to_japanese(_id integer primary key autoincrement, name, japanese_name);")
db.transaction do
for i in 1..721 do
uri = "http://www.serebii.net/pokedex-xy/#{sprintf("%03d",i)}.shtml"
doc = Nokogiri::HTML(open(uri))
name = get_name(doc)
base_stats = get_base_stats(doc)
name_ja = get_name_ja(doc,name)
for j in 0..name.length-1 do
db.execute("insert into base_stats\
(name,hp,attack,defense,sp_atk,sp_def,speed)\
VALUES(\"#{name[j]}\",\
#{base_stats[j][0]}, #{base_stats[j][1]},\
#{base_stats[j][2]}, #{base_stats[j][3]},\
#{base_stats[j][4]}, #{base_stats[j][5]} );")
#
db.execute("insert into to_japanese\
(name,japanese_name)\
VALUES(\"#{name[j]}\", '#{name_ja[j]}');");
end
if i % 10 == 0
puts "loading data...(#{i}/721)"
printf "\e[1A"
end
end
end
db.execute("create index name_index on base_stats(name);")
db.execute("create index japanese_name_index on to_japanese(japanese_name);")
db.close
end
create_db()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment