Created
November 17, 2013 19:36
-
-
Save takuya/7517251 to your computer and use it in GitHub Desktop.
DoCoMoの請求書をPDF保存 ref: http://qiita.com/takuya_1st/items/29aaccd0c6364ffd6b22
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
#coding: utf-8 | |
$KCODE='u' if RUBY_VERSION.to_f < 1.9 | |
require 'rubygems' if RUBY_VERSION.to_f < 1.9 | |
module BillScraper | |
class MyDoCoMo | |
require 'pp' | |
require 'mechanize' | |
require 'nkf' | |
require 'kconv' | |
require 'scanf' | |
class Mechanize::HTTP::Agent | |
def post_connect uri, response, body_io # :yields: agent, uri, response, body | |
@post_connect_hooks.each do |hook| | |
begin | |
hook.call self, uri, response, body_io | |
ensure | |
body_io.rewind | |
end | |
end | |
end | |
end | |
attr_accessor :m | |
def initialize( id=nil, password=nil ) | |
@m = Mechanize.new | |
@m.user_agent_alias = 'Windows IE 7' | |
@@id = id | |
@@password = password | |
self.enable_force_encoding | |
end | |
def enable_force_encoding | |
@m.post_connect_hooks << lambda{|ua,uri,res,body_io| | |
if res["Content-Type"] =~ /^text\/.*$/ | |
body = NKF.nkf('-wxm0',body_io.read) | |
body.gsub! /shift-jis/i,"utf-8" | |
body_io.truncate body_io.pos | |
body_io.rewind | |
body_io.puts body | |
end | |
} | |
end | |
def disable_force_encoding | |
@m.post_connect_hooks.clear | |
end | |
def login(id=nil,password=nil) | |
id ||= @@id | |
password ||= @@password | |
@m.get "http://www.mydocomo.com/web/home/" | |
@m.page.links_with( :text => "ログイン").first.click | |
#@m.page.body = @m.page.body.toutf8 | |
@m.page.forms[0].fields_with(:name=>"MDCM_UID").first.value = id | |
@m.page.forms[0].fields_with(:name=>"MDCM_PWD").first.value = password | |
@m.page.forms[0].submit | |
self.go_through_refresh_page | |
end | |
def go_through_refresh_page | |
#クッションページ・リフレッシュがある。 | |
return unless @m.page.search("meta[http-equiv='refresh']").size>0 | |
redirect_to = @m.page.search("meta[http-equiv='refresh']").attr("content").to_s.split(/;/).last.scanf("url=%s").first | |
redirect_to = URI.join(@m.page.uri.to_s,redirect_to) | |
@m.get redirect_to | |
#@m.page.body = m.page.body.toutf8 | |
@m.page.body | |
end | |
def ryokin_page | |
@m.page.links_with( :text=>"ご利用料金の確認").first.click | |
self.go_through_refresh_page | |
end | |
def ryokin_matome_page #一括請求ページ | |
# 一括 | |
form = @m.page.forms_with(:name=>"form1").first | |
form.fields_with(:name=>"root_GKFAGW001_DENWABANGOPULLDOWN").last.options[0].select | |
form.action = form.action+"&root_GKFAGW001SubmitHyoujiPull=" | |
# puts form.action | |
form.submit | |
#puts m.page.body = m.page.body.tosjis | |
#@m.page.body = m.page.body.toutf8 | |
@m.page.body | |
end | |
def latest_ryokin_meisai # go | |
#直近で請求が確定している月 | |
table = @m.page.search('table.new-amountclaimschedule').last | |
year_month = table.search("tr th p.strong")[1] | |
year_month = year_month.text() | |
q = table.search("tr img[alt='利用内訳']").first["onclick"][/setSeikyuNengtsu\('(\d+)'\)/, 1] | |
form = @m.page.forms_with(:name=>"form1").first | |
form.action = form.action+"&root_GKFAGW001SubmitShosaiUtiwake=" | |
form.fields_with(:name=>"root_GKFAGW001_HIBUTTONNENGETU").first.value = q | |
# p q | |
# exit | |
form.submit | |
@m.page.body = @m.page.body.toutf8 | |
year_month | |
end | |
def get_latest_meisai_pdf | |
# 位置をリセット | |
self.go_home | |
#料金ページヘ | |
self.ryokin_page | |
self.ryokin_matome_page | |
#取得可能な最新月 | |
y_m = self.latest_ryokin_meisai | |
# | |
#半角カナがフォームの値にあるので、変換すると化けるので、utf-8強制変換をオフ | |
# | |
self.disable_force_encoding | |
#PDF ダウンロード リンク | |
form = @m.page.forms_with(:name=>"form1").first | |
form.action = form.action+"&root_GKFAGW001SubmitPdf=" | |
form.submit | |
#doui() 同意する | |
form = @m.page.forms_with(:name=>"form1").first | |
form.action = form.action+"&root_GKHCGW001SubmitAgree=" | |
form.submit | |
#ダウンロード | |
form = @m.page.forms_with(:name=>"form1").first | |
form.action = form.action+"&root_GKHCGW001SubmitAgree=" | |
form.action = form.action+"&BisAutoSubmissionREQUEST=" | |
form.submit | |
# | |
# utf-8強制変換をもとに戻す | |
# | |
self.enable_force_encoding | |
return {"file_name" => "docomo-#{y_m}-meisai.pdf", "body"=> @m.page.body , "month" => y_m } | |
end | |
def get_latest_gaiyo_pdf | |
# 位置をリセット | |
self.go_home | |
#料金ページヘ | |
self.ryokin_page | |
self.ryokin_matome_page | |
#取得可能な最新月 | |
y_m = self.latest_ryokin_gaiyou | |
self.disable_force_encoding # ⇐ 重要 | |
#PDF ダウンロード リンク | |
form = @m.page.forms_with(:name=>"form1").first | |
form.action = form.action+"&root_GKFBGW001SubmitPdf=" | |
form.submit | |
#m.page.body = m.page.body.toutf8 unless RUBY_VERSION.to_f < 1.9 | |
#doui() 同意する | |
form = @m.page.forms_with(:name=>"form1").first | |
form.action = form.action+"&root_GKHCGW001SubmitAgree=" | |
form.submit | |
#ダウンロード | |
form = @m.page.forms_with( :name=>"form1") | |
form = @m.page.forms_with(:name=>"form1").first | |
form.action = form.action+"&root_GKHCGW001SubmitAgree=" | |
form.action = form.action+"&BisAutoSubmissionREQUEST=" | |
form.submit | |
# 再び、強制変換オン | |
self.enable_force_encoding | |
return {"file_name" => "docomo-#{y_m}-gaiyo.pdf", "body"=> @m.page.body , "month" => y_m } | |
end | |
def latest_ryokin_gaiyou # go | |
# 一括料金表示ページを解析 | |
#直近で請求が確定している月 | |
table = @m.page.search('table.new-amountclaimschedule').last | |
year_month = table.search("tr th p.strong")[1] | |
year_month = year_month.text() | |
q = table.search("tr img[alt='お知らせ']").first["onclick"][/setSeikyuNengtsu\('(\d+)'\)/, 1] | |
#puts q | |
form = @m.page.forms_with(:name=>"form1").first | |
form.action = form.action+"&root_GKFAGW001SubmitSeikyuGaiyou=" | |
form.fields_with(:name=>"root_GKFAGW001_HIBUTTONNENGETU").first.value = q | |
form.submit | |
#@m.page.body = @m.page.body.toutf8 | |
#puts @m.page.body | |
year_month | |
end | |
def get_latest_pdf_download | |
# 最新の料金の概要と明細をダウンロードして、1枚のPDFに結合する | |
# 料金概要 | |
gaiyo = nil | |
gaiyo = self.get_latest_gaiyo_pdf | |
## # ファイル保存 | |
open(gaiyo["file_name"], "w"){|f| f.print gaiyo["body"] } | |
#y_m = gaiyo["month"]; | |
gaiyo_path = gaiyo["file_name"] | |
# 明細を取得する | |
meisai = nil | |
meisai = self.get_latest_meisai_pdf | |
meisai_path = meisai["file_name"] | |
# ファイルの保存 | |
open(meisai["file_name"], "w"){|f| f.print meisai["body"] } | |
y_m = meisai["month"]; | |
out_path = "docomo-#{y_m}.pdf" | |
#puts cmd = "/usr/bin/env pdftk #{gaiyo_path} #{meisai_path} cat output #{out_path} " | |
`/usr/bin/env pdftk #{gaiyo_path} #{meisai_path} cat output #{out_path} ` | |
body = open(out_path).read | |
File.unlink meisai_path | |
File.unlink gaiyo_path | |
File.unlink out_path | |
return { "file_name"=> out_path, "month" => "#{y_m}", "body"=>body } | |
end | |
def go_home() | |
@m.get "https://www.mydocomo.com/dcm/dfw/web/pub3/r/header/home.html" | |
self.go_through_refresh_page | |
end | |
end | |
end | |
if $0 == __FILE__ then | |
# docomo = BillScraper::cMyDoCoMo.new "0901234567","password" | |
# docomo.login | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment