Created
September 25, 2018 11:16
-
-
Save koshian/38607ef491e2b817d6ab1095d9f35d19 to your computer and use it in GitHub Desktop.
日本雑誌協会の印刷部数公表データからコミック誌の部数を拾ってきてCSV出力するスクリプトです。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# coding: utf-8 | |
require 'nokogiri' | |
require 'open-uri' | |
require 'csv' | |
uri_template = 'https://www.j-magazine.or.jp/user/printed/index/%d/%d' | |
result = [] | |
periods = [] | |
genre_list = [ | |
14, # 少年向けコミック誌 | |
15, # 男性向けコミック誌 | |
29, # 少女向けコミック誌 | |
30, # 女性向けコミック誌 | |
] | |
(1..41).map {|period| | |
genre_list.each do |genre| | |
doc = Nokogiri::HTML(open(uri_template % [period, genre])) | |
if periods.empty? | |
doc.search('#period_cd_top option').each do |option| | |
periods[option.attribute('value').value.to_i] = option.inner_html | |
end | |
end | |
doc.search("#content table tr").each do |tr| | |
tr.search("td.MagDataTab_MagNa a").each do |anchor| | |
title = anchor.inner_html | |
link = anchor.attribute('href').value | |
count = tr.search('td.MagDataTab_count')[0].inner_html | |
result.push({period: period, title: title, link: link, count: count}) | |
end | |
end | |
end | |
} | |
box = [] | |
titles = ['期間'] | |
result.each do |r| | |
i = titles.index(r[:title]) | |
unless i | |
titles.push(r[:title]) | |
i = titles.index(r[:title]) | |
end | |
box[r[:period]] = [] unless box[r[:period]] | |
box[r[:period]][i] = r[:count] | |
box[r[:period]][0] = periods[r[:period].to_i] unless box[r[:period]][0] | |
end | |
box[0] = titles | |
box.each do |b| | |
print b.to_csv | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment