Skip to content

Instantly share code, notes, and snippets.

@takuma-saito
Last active May 2, 2020 13:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save takuma-saito/f81fa3e48005175e960c9b46723efa8b to your computer and use it in GitHub Desktop.
Save takuma-saito/f81fa3e48005175e960c9b46723efa8b to your computer and use it in GitHub Desktop.
worker_get_url.rb
source 'https://rubygems.org'
gem 'open_uri_redirections'
#!/bin/bash -xe
curl $1 |
egrep -o 'href="(.*?)"' |
tr '=' ' ' |
awk '{print $2}' |
tr -d '"' |
grep -E '^https?://'
require 'thread'
require 'open-uri'
require 'open_uri_redirections'
class Workers
def initialize(count)
@q = Queue.new
@count = count
@jobs = (0...@count).map.with_index {|id|
Thread.new do
while (job = @q.pop)
total = job.(id, total)
end
end
}
end
def push(&proc)
@q.push(proc)
end
def wait
@count.times { @q.push(nil) }
@jobs.each {|job| job.join.value}
ensure
@q.close
end
end
def run(uri)
try = 0
begin
puts "open: #{uri}"
res = open(uri, :allow_redirections => :safe)
puts "success: #{uri} #{res.read.size} #{res.status}"
rescue OpenURI::HTTPRedirect => redirect
uri = redirect.uri
p uri
retry
rescue => e
try += 1
warn "warn: #{try} #{e.message}\n"
retry if try <= 4
end
end
w = Workers.new(60)
$stdin.read.split("\n").each do |uri|
w.push do |id|
run(uri)
end
end
w.wait
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment