Skip to content

Instantly share code, notes, and snippets.

@ybenjo
Last active October 15, 2015 13:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ybenjo/9722293 to your computer and use it in GitHub Desktop.
Save ybenjo/9722293 to your computer and use it in GitHub Desktop.
crawler of eventernote.com
# -*- coding: utf-8 -*-
require 'open-uri'
require 'nokogiri'
require 'logger'
require 'time'
require 'set'
require 'pp'
UA = ''
class EventernoteCrawler
def initialize(log_name = 'crawl')
@log = Logger.new("#{File.dirname(File.expand_path(__FILE__))}/#{log_name}.log")
end
# event info
def get_event_info(event_id)
event_info = {_id: event_id}
@log.info("call get_event_info(#{event_id})")
begin
url = "http://www.eventernote.com/events/#{event_id}"
@log.info("open #{url}")
doc = Nokogiri::HTML(open(url, 'User-Agent' => UA).read)
title = (doc/'div.gb_events_detail_title.clearfix.bg'/'h2').inner_text
event_info[:title] = title
(doc/'table.table-striped.table-bordered'/'tr').each do |elem|
k = (elem/'td')[0].inner_text
v = (elem/'td')[1]
case k
when '開催日時'
date = (v/'a').inner_text
event_info[:date] = date
when '出演者'
actors = [ ]
(v/'li'/'a').each do |e|
actor_name = e.inner_text
actor_url = e.attribute('href').value
id = actor_url.scan(/actors\/.*?\/(\d{1,})/).first.first.to_i
actors.push(name: actor_name, url: actor_url, id: id)
end
event_info[:actors] = actors
when '開催場所'
location_url = (v/'a').attribute('href').value
location = (v/'a').inner_text
id = location_url.scan(/\/places\/(\d{1,})/).first.first.to_i
event_info[:location_info] = {location: location, url: location_url, id: id}
else
event_info[k] = v.inner_text
end
end
rescue => e
@log.error(e.message)
return { }
end
event_info
end
# bet event participants
def get_event_participants(event_id)
@log.info("call get_event_participants(#{event_id})")
users = [ ]
url = "http://www.eventernote.com/events/#{event_id}/users?event_id=#{event_id}&limit=10000000000&page=1"
@log.info("open #{url}")
begin
doc = Nokogiri::HTML(open(url, 'User-Agent' => UA).read)
(doc/'ul.clearfix'/'li'/'p.name.pre').each do |elem|
user_name = elem.inner_text
users.push user_name
end
rescue => e
@log.error(e.message)
return [ ]
end
users
end
# user info
def get_user_info(user_name)
@log.info("call get_user_info(#{user_name})")
top_url = "http://www.eventernote.com/users/#{user_name}"
user_info = {_id: user_name}
begin
@log.info("open #{top_url}")
top_doc = Nokogiri::HTML(open(top_url, 'User-Agent' => UA).read)
user_info[:favorites] = [ ]
(top_doc/'ul.gb_actors_list.unstyled'/'li'/'a').each do |elem|
actor_url = elem.attribute('href').value
actor_name = elem.inner_text
user_info[:favorites].push(name: actor_name, url: actor_url)
end
# get number of following/follower
user_info[:number_of_following] = (top_doc/'div.gb_score_table'/'a')[0].inner_text.to_i
user_info[:number_of_follower] = (top_doc/'div.gb_score_table'/'a')[1].inner_text.to_i
rescue => e
@log.error(e.message)
return { }
end
user_info
end
# following
def get_user_following(user_name)
following = [ ]
following_url = "http://www.eventernote.com/users/#{user_name}/following"
@log.info("open #{following_url}")
begin
following_doc = Nokogiri::HTML(open(following_url, 'User-Agent' => UA).read)
(following_doc/'div.gb_users_list'/'li'/'a').each do |elem|
following.push elem.inner_text
end
rescue => e
@log.error(e.message)
return [ ]
end
following
end
# followers
def get_user_follower(user_name)
follower = [ ]
follower_url = "http://www.eventernote.com/users/#{user_name}/follower"
@log.info("open #{follower_url}")
begin
follower_doc = Nokogiri::HTML(open(follower_url, 'User-Agent' => UA).read)
(follower_doc/'div.gb_users_list'/'li'/'a').each do |elem|
follower.push elem.inner_text
end
rescue => e
@log.error(e.message)
return [ ]
end
follower
end
end
if __FILE__ == $0
# クローラ本体
crawler = EventernoteCrawler.new
# event_id = 1 のイベント情報取得
event_id = 1
pp crawler.get_event_info(event_id)
#=>
# {:_id=>1,
# :title=>
# "アニメロミックスpresents NANA MIZUKI LIVE GRACE 2013 -OPUSⅡ- supported by JOYSOUND Calbeeポテリッチ 1日目",
# :date=>"2013-01-19 (土)",
# "時間"=>"開場 16:30 開演 18:30 終演 21:30\n ※終演時間はあくまでも目安になります\n",
# :location_info=>{:location=>"さいたまスーパーアリーナ", :url=>"/places/2", :id=>2},
# :actors=>
# [{:name=>"水樹奈々",
# :url=>"/actors/%E6%B0%B4%E6%A8%B9%E5%A5%88%E3%80%85/28",
# :id=>28}],
# "関連リンク"=>"http://www.mizukinana.jp/special/livegrace2013_opus2/index.html",
# "概要"=> ...,
# "Twitterハッシュタグ"=>"#mizukinana"}
# event_id = 1 の参加者一覧を取得
pp crawler.get_event_participants(event_id)
#=>
# ["maeda___keiji",
# ...]
# user_name = 'y_benjo' の情報を取得
user_name = 'y_benjo'
pp crawler.get_user_info(user_name)
# {:_id=>"y_benjo",
# :favorites=>
# [{:name=>"新谷良子", :url=>"/actors/%E6%96%B0%E8%B0%B7%E8%89%AF%E5%AD%90/875"},
# {:name=>"ZAQ", :url=>"/actors/ZAQ/3251"},
# {:name=>"三上枝織", :url=>"/actors/%E4%B8%89%E4%B8%8A%E6%9E%9D%E7%B9%94/2397"},
# {:name=>"nano.RIPE", :url=>"/actors/nano.RIPE/2972"},
# {:name=>"大久保瑠美",
# :url=>"/actors/%E5%A4%A7%E4%B9%85%E4%BF%9D%E7%91%A0%E7%BE%8E/2643"},
# {:name=>"佐藤利奈", :url=>"/actors/%E4%BD%90%E8%97%A4%E5%88%A9%E5%A5%88/1193"}],
# :number_of_following=>16,
# :number_of_follower=>1}
# 'y_benjo' がフォローしているユーザ一覧を取得
pp crawler.get_user_following(user_name)
# ["hetyo525",
# ...]
# 'y_benjo' をフォローしているユーザ一覧を取得
pp crawler.get_user_follower(user_name)
# ["rasiel9713"]
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment