Last active
August 29, 2015 14:11
-
-
Save komasaru/2836000139834d054bcb to your computer and use it in GitHub Desktop.
Ruby script to check a difference between a saved-html and a current-html.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/local/bin/ruby | |
# coding: utf-8 | |
#= Checking HTML difference | |
# | |
# date name version | |
# 2014.12.11 mk-mode 1.00 新規作成 | |
# | |
# Copyright(C) 2014 mk-mode.com All Rights Reserved. | |
#--------------------------------------------------------------------------------- | |
# 引数 : なし | |
#--------------------------------------------------------------------------------- | |
#++ | |
require 'kconv' | |
require 'open-uri' | |
require 'openssl' | |
require 'timeout' | |
class CheckHtml | |
# クラス定数 | |
URL = "http://www.********.com/********/" # <= チェック対象ページの URL | |
DIR = "/path/to/data" # <= 取得 HTML 格納ディレクトリ | |
FILE_L = "#{DIR}/last.html" # <= 前回取得 HTML | |
FILE_C = "#{DIR}/current.html" # <= 今回取得 HTML | |
TIMEOUT = 15 # <= タイムアウト設定(単位:秒) | |
UA = "mk-mode Bot (Ruby/#{RUBY_VERSION}, Administrator: ********@********.com)" | |
# <= ユーザエージェント | |
# 主処理 | |
def exec | |
# 前回取得 HTML | |
read_html_last | |
# 今回取得 HTML | |
get_html_current | |
# 今回取得 HTML ファイル保存 | |
save_html_current | |
# 今回取得 HTML == 前回取得 HTML なら終了 | |
return if @html_c == @html_l | |
# 今回取得 HTML を前回取得 HTML としてファイル保存 | |
save_html_last | |
# 今回取得 HTML をタイムスタンプ付ファイル名(YYMMDD_HHMMSS.html)としても保存 | |
save_html_timestamp | |
rescue => e | |
$stderr.puts "[#{e.class}] #{e.message}" | |
e.backtrace.each{ |bt| $stderr.puts "\t#{bt}" } | |
exit 1 | |
end | |
private | |
# 前回取得 HTML | |
def read_html_last | |
@html_l = File.exists?(FILE_L) ? File.open(FILE_L, "r").read.chomp : "" | |
rescue => e | |
raise | |
end | |
# 今回取得 HTML | |
def get_html_current | |
timeout(TIMEOUT) do | |
@html_c = open( | |
URL, | |
{"User-Agent" => UA, :ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE} | |
) do |f| | |
f.read | |
end.chomp.toutf8 | |
end | |
rescue => e | |
raise | |
end | |
# 今回取得 HTML ファイル保存 | |
def save_html_current | |
File.open(FILE_C, "w") { |f| f.puts @html_c } | |
rescue => e | |
raise | |
end | |
# 今回取得 HTML を前回取得 HTML としてファイル保存 | |
def save_html_last | |
File.open(FILE_L, "w") { |f| f.puts @html_c } | |
rescue => e | |
raise | |
end | |
# 今回取得 HTML をタイムスタンプ付ファイル名(YYMMDD_HHMMSS.html)としても保存 | |
def save_html_timestamp | |
file_name = "#{DIR}/#{Time.now.strftime("%y%m%d_%H%M%S")}.html" | |
File.open(file_name, "w") { |f| f.puts @html_c } | |
rescue => e | |
raise | |
end | |
end | |
# 実行 | |
CheckHtml.new.exec if __FILE__ == $0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment