Last active
April 7, 2018 19:22
-
-
Save etoyoda/f5d52752bc8fb3ed79162377c73beb10 to your computer and use it in GitHub Desktop.
ruby script to download realtime wx data from WIS GISC Tokyo website, using GDBM
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
require 'net/http' | |
require 'openssl' | |
require 'uri' | |
require 'gdbm' | |
require 'syslog' | |
class WGet | |
def initialize | |
@conn = nil | |
@resp = nil | |
@ca = nil | |
@n = Hash.new(0) | |
end | |
def ca= val | |
@ca = val | |
end | |
def connect(uri) | |
if @conn then | |
return 0 if @conn.address == uri.host and @conn.port == uri.port | |
@conn.finish | |
end | |
STDERR.puts "#CONNECT #{uri.host}:#{uri.port}" if $VERBOSE | |
@conn = Net::HTTP.new(uri.host, uri.port, :ENV) | |
@conn.use_ssl = true | |
if @ca | |
@conn.ca_file = @ca | |
else | |
@conn.verify_mode = OpenSSL::SSL::VERIFY_NONE | |
end | |
begin | |
@conn.start | |
rescue Net::OpenTimeout => e | |
$logger.err('Net::OpenTimeout') | |
raise e | |
end | |
end | |
def get(uri, lmt = nil, etag = nil) | |
connect(uri) | |
hdr = {} | |
path = uri.request_uri | |
STDERR.puts "#GET #{path}" if $VERBOSE | |
if lmt then | |
hdr['if-modified-since'] = lmt | |
end | |
if etag then | |
hdr['If-None-Match'] = etag | |
end | |
STDERR.puts "# #{hdr.inspect}" if $VERBOSE | |
@resp = @conn.get2(path, hdr) | |
STDERR.puts "#--> #{@resp.code}" if $VERBOSE | |
rc = @resp.code | |
@n[rc] += 1 | |
rc | |
end | |
def body | |
@resp.body | |
end | |
def lmt | |
@resp['last-modified'] | |
end | |
def etag | |
@resp['etag'] | |
end | |
def close | |
@conn.finish if @conn and @conn.started? | |
$logger.info('elapsed %g wget %s', Time.now - $onset, @n.inspect) | |
end | |
end | |
class SynDL | |
def help | |
puts "#$0 rtdb logdb feedurl ..." | |
exit 1 | |
end | |
def initialize argv | |
@rtdb = argv.shift | |
@logdb = argv.shift | |
@feeds = argv | |
help if @feeds.empty? | |
@wget = WGet.new | |
@pfilter = {} | |
end | |
def getlmt(feed) | |
lmt = etag = nil | |
mode = GDBM::NOLOCK | GDBM::READER | |
GDBM.open(@rtdb, 0644, mode) {|rtdb| | |
key = "lmt/#{feed}" | |
lmt = rtdb[key] | |
key = "etag/#{feed}" | |
etag = rtdb[key] | |
} | |
return [lmt, etag] | |
rescue Errno::ENOENT | |
nil | |
end | |
def setlmt(feed, lmt, etag) | |
return unless lmt or etag | |
mode = GDBM::WRCREAT | |
GDBM.open(@rtdb, 0644, mode) {|rtdb| | |
if lmt then | |
key = "lmt/#{feed}" | |
rtdb[key] = lmt | |
end | |
if etag then | |
key = "etag/#{feed}" | |
rtdb[key] = etag | |
end | |
} | |
end | |
def getfeed(ldb, feed) | |
lmt, etag = getlmt(feed) | |
ufeed = URI.parse(feed) | |
STDERR.puts "##{ufeed.inspect}" if $VERBOSE | |
begin | |
code = @wget.get(ufeed, lmt, etag) | |
case code | |
when '304' then | |
STDERR.puts "#unchanged" if $VERBOSE | |
return 0 | |
when '200' then | |
:do_nothing | |
else | |
exit "0#{code}".to_i | |
end | |
rescue Net::OpenTimeout => e | |
exit 16 | |
end | |
fbdy = @wget.body | |
lmt2 = @wget.lmt | |
etag2 = @wget.etag | |
STDERR.puts "#ETag: #{etag2}" if $VERBOSE | |
# @wget can be reused now | |
fbdy.each_line { |line| | |
id = line.chomp | |
if @pfilter[:match] then | |
next unless @pfilter[:match] =~ id | |
end | |
if @pfilter[:reject] then | |
next if @pfilter[:reject] =~ id | |
end | |
if ldb[id] then | |
STDERR.puts "#dup skip #{id}" if $VERBOSE | |
next | |
end | |
begin | |
umsg = URI.parse(id) | |
@wget.get(umsg) | |
body = @wget.body | |
STDERR.puts "#size #{body.size}" if $VERBOSE | |
fnam = File.basename(id).gsub(/[^A-Za-z_0-9.]/, '_') | |
File.open(fnam, 'wb') {|ofp| | |
ofp.write body | |
} | |
ldb[id] = Time.now.utc.strftime('%Y-%m-%dT%H%M%SZ') | |
end | |
} | |
setlmt(feed, lmt2, etag2) | |
end | |
def run | |
$logger = Syslog.open | |
$onset = Time.now | |
GDBM.open(@logdb, 0644, GDBM::WRCREAT) {|ldb| | |
@feeds.each {|feed| | |
case feed | |
when /^--match=/ | |
if $'.empty? then @pfilter.delete(:match) | |
else @pfilter[:match] = Regexp.new($') | |
end | |
when /^--reject=/ | |
if $'.empty? then @pfilter.delete(:reject) | |
else @pfilter[:reject] = Regexp.new($') | |
end | |
when /^--ca=/ | |
@wget.ca= $' | |
when /^--chdir=/ | |
Dir.chdir($') | |
else | |
STDERR.puts "getfeed #{feed}" if $VERBOSE | |
getfeed(ldb, feed) | |
end | |
} | |
} | |
ensure | |
@wget.close | |
$logger.close | |
end | |
end | |
SynDL.new(ARGV).run |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment