Created
June 4, 2021 07:02
-
-
Save jimmygle/02fcbe2162771f2bd67976b2326238ee to your computer and use it in GitHub Desktop.
Sloppy demonstration of multithreaded http requests in ruby
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'csv' | |
require 'thread' | |
require 'uri' | |
require 'net/http' | |
require 'json' | |
require 'pry' # binding.pry | |
## | |
## Sloppy script I threw together to demonstrate multithreaded http requests | |
## | |
ZIPS_CSV = './data/uszips.csv' | |
OUTPUT_CSV = './results.csv' | |
THREAD_COUNT = 10 | |
# Loads source dataset CSV and parses it into array of hashes | |
def load_locales(file: ZIPS_CSV) | |
return CSV.parse(File.read(file), headers: true).map do |loc| | |
{ | |
zip: loc['zip'], | |
city: loc['city'], | |
county: loc['county_names_all'], | |
state: loc['state_name'], | |
pop: loc['population'], | |
density: loc['density'], | |
timezone: loc['timezone'], | |
} | |
end | |
end | |
def fetch_result(zip) | |
url = URI("https://.../#{zip}") | |
https = Net::HTTP.new(url.host, url.port) | |
https.use_ssl = true | |
request = Net::HTTP::Get.new(url) | |
request["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36" | |
request["referer"] = "https://www.google.com/" | |
request["pragma"] = "no-cache" | |
request["origin"] = "https://www.google.com" | |
request["accept"] = "*/*" | |
request[""] = "" | |
response = https.request(request) | |
parsed = JSON.parse(response.body) | |
is_servicing = parsed['identifier'] != nil | |
puts "#{zip} #{(is_servicing ? 'YES' : 'NO ')} [ #{response.code} #{url} ]" | |
return is_servicing | |
end | |
def check_zips(thread_count: THREAD_COUNT, locales:) | |
results = [] | |
mutex = Mutex.new | |
threads = thread_count.times.map do |thread| | |
Thread.new(locales, results) do |locales, results| | |
while loc = mutex.synchronize { locales.pop } | |
loc[:check] = fetch_result(loc[:zip]) | |
mutex.synchronize { results << loc } | |
end | |
end | |
end | |
threads.each(&:join) | |
return results | |
end | |
def results_to_csv(results) | |
CSV.open(OUTPUT_CSV, "wb") do |csv| | |
csv << results.first.keys.map(&:to_s) # header | |
results.each do |res| | |
csv << res.values | |
end | |
end | |
end | |
locs = load_locales | |
checked = check_zips(locales: locs) | |
results_to_csv(checked) | |
#fetch_result('20001') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment