Skip to content

Instantly share code, notes, and snippets.

@dai1741
Created March 8, 2013 05:08
Show Gist options
  • Save dai1741/5114332 to your computer and use it in GitHub Desktop.
Save dai1741/5114332 to your computer and use it in GitHub Desktop.
あるwebページを定期的に巡回し更新があればgitリポジトリにコミットするスクリプト 要PhantomJS・Python2・bash
system = require 'system'
fs = require 'fs'
if system.args.length is 1
console.log 'Usage: data-saver.coffee address [userName] [password]'
phantom.exit 1
else
phantom.outputEncoding = 'System' # for good old ms-dos
console.log 'ブラウザ起動中...'
page = require('webpage').create() # ブラウザ起動
page.settings.loadImages = no # 画像は読み込まない
# BASIC認証用のデータ
page.settings.userName = system.args[2]
page.settings.password = system.args[3]
address = system.args[1]
page.onLoadStarted = ->
console.log 'ページ読み込み開始'
onLoadPage = (status) ->
if status isnt 'success'
console.log 'ページの読み込みに失敗'
phantom.exit 1
return
console.log "読み込み完了"
# テキスト形式とHTML形式でデータを保存
fs.write "watcher-output/page.txt", page.evaluate ->
document.querySelector('html').outerText
fs.write "watcher-output/html.html", page.evaluate ->
document.querySelector('html').outerHTML
phantom.exit()
page.open address, onLoadPage
[PageData]
url=http://example.com/somepage.html
userName=someuser
password=somepasswd
intervalSec=600
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import time
from datetime import datetime
import ConfigParser
# ローカル情報(username、password等)をiniファイルからロード
conf = ConfigParser.ConfigParser()
conf.read('local.ini')
while True:
ret = os.system('phantomjs data-saver.coffee "%s" %s %s' % (
conf.get('PageData', 'url'), conf.get('PageData', 'userName'), conf.get('PageData', 'password')))
if ret == 0:
# テキスト形式で保存したファイルに更新があればgitに保存し、更新がなければその他の更新があっても破棄
os.system('bash -c "cd watcher-output; git add -A; ' +
'(git diff --cached --exit-code --quiet page.txt' +
' && (git commit -m \'ignore\'; git reset --hard HEAD^)' +
' || git commit -m \'Update (auto commit)\' )"')
print 'successfully updated on %s' % datetime.today()
else:
print 'some error occurred while calling phantomjs on %s' % datetime.today()
print 'waits for next try'
time.sleep(int(conf.get('PageData', 'intervalSec')))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment