Skip to content

Instantly share code, notes, and snippets.

@handygeospatial
Last active August 29, 2015 14:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save handygeospatial/f5857107f3fd2ac5027b to your computer and use it in GitHub Desktop.
Save handygeospatial/f5857107f3fd2ac5027b to your computer and use it in GitHub Desktop.
SizedQueue を使った無駄撃ちのない地理院タイルダウンローダ(Ruby) ref: http://qiita.com/handygeospatial/items/5baaf9d0ae1c817420bd
require 'open-uri'
require 'digest/md5'
require 'fileutils'
require 'zlib'
require 'thread'
Z_EXTENT = (18..18)
T = 'std'
N_THREADS = 8
Q_SIZE = 200
WAIT = 5
$threads = Array.new(N_THREADS)
$status = {:skip => 0, :ok => 0, :ng => 0, :path => nil}
$q = SizedQueue.new(Q_SIZE)
$threads.size.times {|i|
$threads[i] = Thread.new(i) do
while o = $q.pop
buf = open(o[:url]).read
buf_md5 = Digest::MD5.hexdigest(buf)
if o[:md5] != buf_md5
$status[:ng] += 1
FileUtils.rm(o[:path]) if File.exist?(o[:path])
else
[File.dirname(o[:path])].each{|it|
FileUtils.mkdir_p(it) unless File.directory?(it)
}
File.open("#{o[:path]}", 'w') {|w| w.print buf}
$status[:ok] += 1
end
end
end
}
watcher = Thread.new do
while $threads.reduce(false) {|any_alive, t| any_alive or t.alive?}
last_status = $status.clone
sleep WAIT
print <<-EOS
#{Time.now.iso8601[11..18]} #{$status[:path]} #{$q.size} \
#{%w{skip ok ng}.map{|k| ($status[k.to_sym] - last_status[k.to_sym]) / WAIT}}\
/s #{%w{skip ok ng}.map{|k| $status[k.to_sym]}}
EOS
end
end
Zlib::GzipReader.open('mokuroku.csv.gz').each_line {|l|
(path, date, size, md5) = l.strip.split(',')
url = "http://cyberjapandata.gsi.go.jp/xyz/#{T}/#{path}"
$status[:path] = path
if (!Z_EXTENT.include?(path.split('/')[0].to_i)) or
(File.exist?("#{path}") && Digest::MD5.file(path) == md5)
$status[:skip] += 1
next
end
$q.push({:url => url, :md5 => md5, :path => path})
}
$threads.size.times {|i| $q.push(nil)}
$threads.each {|t| t.join}
watcher.join
require 'open-uri'
require 'digest/md5'
require 'fileutils'
require 'zlib'
require 'thread'
Z_EXTENT = (18..18)
T = 'std'
N_THREADS = 8
Q_SIZE = 200
WAIT = 5
CONTINUE = 23549708
$threads = Array.new(N_THREADS)
$status = {:skip => 0, :ok => 0, :ng => 0, :path => nil}
$q = SizedQueue.new(Q_SIZE)
$threads.size.times {|i|
$threads[i] = Thread.new(i) do
while o = $q.pop
buf = open(o[:url]).read
buf_md5 = Digest::MD5.hexdigest(buf)
if o[:md5] != buf_md5
$status[:ng] += 1
FileUtils.rm(o[:path]) if File.exist?(o[:path])
else
[File.dirname(o[:path])].each{|it|
FileUtils.mkdir_p(it) unless File.directory?(it)
}
File.open("#{o[:path]}", 'w') {|w| w.print buf}
$status[:ok] += 1
end
end
end
}
watcher = Thread.new do
while $threads.reduce(false) {|any_alive, t| any_alive or t.alive?}
last_status = $status.clone
sleep WAIT
print <<-EOS
#{Time.now.iso8601[11..18]} #{$status[:path]} #{$q.size} \
#{%w{skip ok ng}.map{|k| ($status[k.to_sym] - last_status[k.to_sym]) / WAIT}}\
/s #{%w{skip ok ng}.map{|k| $status[k.to_sym]}} #{$count}
EOS
end
end
$count = 0
Zlib::GzipReader.open('mokuroku.csv.gz').each_line {|l|
$count += 1
(path, date, size, md5) = l.strip.split(',')
url = "http://cyberjapandata.gsi.go.jp/xyz/#{T}/#{path}"
$status[:path] = path
if ((CONTINUE ? $count < CONTINUE : false) ||
!Z_EXTENT.include?(path.split('/')[0].to_i)) ||
(File.exist?("#{path}") && Digest::MD5.file(path) == md5)
$status[:skip] += 1
next
end
$q.push({:url => url, :md5 => md5, :path => path})
}
$threads.size.times {|i| $q.push(nil)}
$threads.each {|t| t.join}
watcher.join
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment