Skip to content

Instantly share code, notes, and snippets.

@etoyoda
Last active April 7, 2018 19:22
Show Gist options
  • Save etoyoda/f5d52752bc8fb3ed79162377c73beb10 to your computer and use it in GitHub Desktop.
Save etoyoda/f5d52752bc8fb3ed79162377c73beb10 to your computer and use it in GitHub Desktop.
ruby script to download realtime wx data from WIS GISC Tokyo website, using GDBM
#!/usr/bin/ruby
require 'net/http'
require 'openssl'
require 'uri'
require 'gdbm'
require 'syslog'
class WGet
def initialize
@conn = nil
@resp = nil
@ca = nil
@n = Hash.new(0)
end
def ca= val
@ca = val
end
def connect(uri)
if @conn then
return 0 if @conn.address == uri.host and @conn.port == uri.port
@conn.finish
end
STDERR.puts "#CONNECT #{uri.host}:#{uri.port}" if $VERBOSE
@conn = Net::HTTP.new(uri.host, uri.port, :ENV)
@conn.use_ssl = true
if @ca
@conn.ca_file = @ca
else
@conn.verify_mode = OpenSSL::SSL::VERIFY_NONE
end
begin
@conn.start
rescue Net::OpenTimeout => e
$logger.err('Net::OpenTimeout')
raise e
end
end
def get(uri, lmt = nil, etag = nil)
connect(uri)
hdr = {}
path = uri.request_uri
STDERR.puts "#GET #{path}" if $VERBOSE
if lmt then
hdr['if-modified-since'] = lmt
end
if etag then
hdr['If-None-Match'] = etag
end
STDERR.puts "# #{hdr.inspect}" if $VERBOSE
@resp = @conn.get2(path, hdr)
STDERR.puts "#--> #{@resp.code}" if $VERBOSE
rc = @resp.code
@n[rc] += 1
rc
end
def body
@resp.body
end
def lmt
@resp['last-modified']
end
def etag
@resp['etag']
end
def close
@conn.finish if @conn and @conn.started?
$logger.info('elapsed %g wget %s', Time.now - $onset, @n.inspect)
end
end
class SynDL
def help
puts "#$0 rtdb logdb feedurl ..."
exit 1
end
def initialize argv
@rtdb = argv.shift
@logdb = argv.shift
@feeds = argv
help if @feeds.empty?
@wget = WGet.new
@pfilter = {}
end
def getlmt(feed)
lmt = etag = nil
mode = GDBM::NOLOCK | GDBM::READER
GDBM.open(@rtdb, 0644, mode) {|rtdb|
key = "lmt/#{feed}"
lmt = rtdb[key]
key = "etag/#{feed}"
etag = rtdb[key]
}
return [lmt, etag]
rescue Errno::ENOENT
nil
end
def setlmt(feed, lmt, etag)
return unless lmt or etag
mode = GDBM::WRCREAT
GDBM.open(@rtdb, 0644, mode) {|rtdb|
if lmt then
key = "lmt/#{feed}"
rtdb[key] = lmt
end
if etag then
key = "etag/#{feed}"
rtdb[key] = etag
end
}
end
def getfeed(ldb, feed)
lmt, etag = getlmt(feed)
ufeed = URI.parse(feed)
STDERR.puts "##{ufeed.inspect}" if $VERBOSE
begin
code = @wget.get(ufeed, lmt, etag)
case code
when '304' then
STDERR.puts "#unchanged" if $VERBOSE
return 0
when '200' then
:do_nothing
else
exit "0#{code}".to_i
end
rescue Net::OpenTimeout => e
exit 16
end
fbdy = @wget.body
lmt2 = @wget.lmt
etag2 = @wget.etag
STDERR.puts "#ETag: #{etag2}" if $VERBOSE
# @wget can be reused now
fbdy.each_line { |line|
id = line.chomp
if @pfilter[:match] then
next unless @pfilter[:match] =~ id
end
if @pfilter[:reject] then
next if @pfilter[:reject] =~ id
end
if ldb[id] then
STDERR.puts "#dup skip #{id}" if $VERBOSE
next
end
begin
umsg = URI.parse(id)
@wget.get(umsg)
body = @wget.body
STDERR.puts "#size #{body.size}" if $VERBOSE
fnam = File.basename(id).gsub(/[^A-Za-z_0-9.]/, '_')
File.open(fnam, 'wb') {|ofp|
ofp.write body
}
ldb[id] = Time.now.utc.strftime('%Y-%m-%dT%H%M%SZ')
end
}
setlmt(feed, lmt2, etag2)
end
def run
$logger = Syslog.open
$onset = Time.now
GDBM.open(@logdb, 0644, GDBM::WRCREAT) {|ldb|
@feeds.each {|feed|
case feed
when /^--match=/
if $'.empty? then @pfilter.delete(:match)
else @pfilter[:match] = Regexp.new($')
end
when /^--reject=/
if $'.empty? then @pfilter.delete(:reject)
else @pfilter[:reject] = Regexp.new($')
end
when /^--ca=/
@wget.ca= $'
when /^--chdir=/
Dir.chdir($')
else
STDERR.puts "getfeed #{feed}" if $VERBOSE
getfeed(ldb, feed)
end
}
}
ensure
@wget.close
$logger.close
end
end
SynDL.new(ARGV).run
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment