Skip to content

Instantly share code, notes, and snippets.

@Rybots
Created May 22, 2016 20:53
Show Gist options
  • Save Rybots/a6de93092c753108b5981e6bf2419007 to your computer and use it in GitHub Desktop.
Save Rybots/a6de93092c753108b5981e6bf2419007 to your computer and use it in GitHub Desktop.
require 'csv'
require 'json'
require 'nokogiri'
require 'open-uri'
require 'kconv'
require 'anemone'
tsv = Array.new
$total = Array.new
JSON.parse(File.open("1.json").read).each do |hash| #open json to parse
(0..6).each do |t|
begin
if hash["user"]["favorite_manga"][t]["author"] != nil
tsv << hash["user"]["id"].to_s
tsv << hash["user"]["login"].to_s
tsv << hash["user"]["favorite_manga"][t]["id"].to_s
tsv << hash["user"]["favorite_manga"][t]["title"].to_s
tsv << hash["user"]["favorite_manga"][t]["author"].to_s
tsv << hash["user"]["favorite_manga"][t]["mangapedia_url"].to_s
$total << tsv
tsv = []
end
rescue
break
end
end
end
JSON.parse(File.open("2.json").read).each do |hash| #open json to parse
(0..6).each do |t|
begin
if hash["user"]["favorite_manga"][t]["author"] != nil
tsv << hash["user"]["id"].to_s
tsv << hash["user"]["login"].to_s
tsv << hash["user"]["favorite_manga"][t]["id"].to_s
tsv << hash["user"]["favorite_manga"][t]["title"].to_s
tsv << hash["user"]["favorite_manga"][t]["author"].to_s
tsv << hash["user"]["favorite_manga"][t]["mangapedia_url"].to_s
$total << tsv
tsv = []
end
rescue
break
end
end
$total.sort_by!{ |a|
[a[0][/\A\d+\z/] ? "%3s" % a[0] : a[0],
a[2][/\A\d+\z/] ? "%3s" % a[2] : a[2]]
}
end
urls = Array.new
# (0..7).each do |n|
urls << "#{$total[0][5]}"
urls << "#{$total[1][5]}"
urls << "#{$total[2][5]}"
urls << "#{$total[3][5]}"
urls << "#{$total[4][5]}"
urls << "#{$total[5][5]}"
urls << "#{$total[6][5]}"
# end
#作者URL取得
author_urls = Array.new
Anemone.crawl(urls, :depth_limit => 0) do |anemone|
anemone.on_every_page do |page|
doc = Nokogiri::HTML.parse(page.body.force_encoding("CP51932").force_encoding("UTF-8"))
doc.xpath('//*[@id="S_Data"]/dl/dd[2]/a').each do |node|
author_urls << "https://mangapedia.com" + node[:href]
end
end
end
awards = Array.new
Anemone.crawl(author_urls, :depth_limit => 0) do |anemone|
anemone.on_every_page do |page|
doc = Nokogiri::HTML.parse(page.body.force_encoding("CP51932").force_encoding("UTF-8"))
doc.xpath('//*[@itemprop="award"]').each do |node|
p node.inner_text.gsub!(/\n/,"").split.join("")
end
end
end
p awards
$sum = Array.new
$big_sum = Array.new
$total.each_with_index do |total,count|
total << author_urls[count]
total << awards[count]
$sum << total
$big_sum = $sum
end
header = ["user_id","user_login","manga_id","manga_title","manga_author","mangapedia_title_url","mangapedia_author_url","awards_of_author"]
CSV.open('result2.tsv','w',:encoding => "UTF-8",:headers => true,:col_sep => " ",:force_quotes => true) do |file|
file << header
$big_sum.each do |line|
file << line
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment