Created
October 10, 2012 02:28
-
-
Save vexus2/3862809 to your computer and use it in GitHub Desktop.
[Ruby]ニコニコ動画検索APIを実行し動画情報を取り出す
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# -*- coding: utf-8 -*- | |
if RUBY_VERSION < '1.9' | |
require 'rubygems' | |
end | |
require File.expand_path('../../config/boot', __FILE__) | |
require 'json' | |
require 'mechanize' | |
require 'cgi' | |
require 'rexml/document' | |
require 'movie' | |
require 'tag' | |
class NicoVideoFetcher | |
def start | |
# 既にニコニコ動画経由で取得された動画の識別子(sm0000)一覧を取得 | |
@nicovideo_identifiers = Movie.get_identifiers(Settings.site_nicovideo) | |
# ニコニコ動画へログインを行う | |
cookie = login(Settings.nicovideo_id, Settings.nicovideo_pw) | |
tags = Tag.all | |
# タグ一覧を正規表現形式で保持 | |
@tags_regex = Regexp.union(tags.map { |v| v.tag }) | |
# DB上のタグのクエリを投げる | |
tags.each { |v| request(cookie, v) } | |
end | |
private | |
# ログインしてクッキー抽出 | |
def login(mail, pass) | |
host = 'secure.nicovideo.jp' | |
path = '/secure/login?site=niconico' | |
body = "mail=#{mail}&password=#{pass}" | |
https = Net::HTTP.new(host, 443) | |
https.use_ssl = true | |
https.verify_mode = OpenSSL::SSL::VERIFY_NONE | |
response = https.start { |https| | |
https.post(path, body) | |
} | |
cookie = '' | |
response['set-cookie'].split('; ').each do |st| | |
if idx=st.index('user_session_') | |
cookie = "user_session=#{st[idx..-1]}" | |
break | |
end | |
end | |
return cookie | |
end | |
private | |
def request(cookie, tag) | |
import_array = [] | |
Settings.max_paging_count.times do |page_num| | |
host = 'ext.nicovideo.jp' | |
# tagでの指定検索 | |
# キーワード検索にする場合はtagの箇所をsearchに変更する | |
path = "/api/search/tag/#{tag.tag}?mode=watch&order=d&page=#{page_num + 1}&sort=n" | |
response = Net::HTTP.new(host).start { |http| | |
request = Net::HTTP::Get.new(path) | |
request['cookie'] = cookie | |
http.request(request) | |
} | |
begin | |
parsed = JSON.parse(response.body) | |
rescue | |
# 連続でクエリを投げすぎた場合、エラーが返されるので | |
# ログに出力し一定時間スリープさせる | |
BatchLogger.error('Request Failed. response.body = ' + response.body) | |
sleep Settings.retry_sleep_seconds | |
next | |
end | |
return if parsed["list"].nil? | |
# 2012-10-08現在、以下の項目が取得可能 | |
# id(sm0000000) | |
# thumbnail_url | |
# length | |
# view_counter(再生数) | |
# num_res(コメント数) | |
# mylist_counter(マイリスト数) | |
parsed["list"].each do |v| | |
# 既に該当動画を登録している場合は登録を行わない | |
next if @nicovideo_identifiers.include?(v["id"]) | |
# 動画のタイトルに、タグに登録されている文言が含まれない場合は登録を行わない | |
next unless v["title"] =~ @tags_regex | |
movie = Movie.new | |
movie.identifier = v["id"] | |
movie.thumbnail_url = v["thumbnail_url"] | |
movie.length = v["length"] | |
movie.title = v["title"] | |
movie.view_counter = v["view_counter"] | |
movie.site = Settings.site_nicovideo | |
movie.tag_id = tag.id | |
movie.comment_counter = v["num_res"] | |
movie.mylist_counter = v["mylist_counter"] | |
import_array << movie | |
BatchLogger.debug v["title"] | |
end | |
Movie.import import_array | |
# APIクエリ毎に指定時間のスリープを行う | |
sleep Settings.retry_sleep_seconds | |
end | |
end | |
end | |
fetcher = NicoVideoFetcher.new | |
fetcher.start |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment