Skip to content

Instantly share code, notes, and snippets.

@chatman
Created August 8, 2020 14:27
Show Gist options
  • Save chatman/8dfc265eb6d13d2a20bb38948fdc112d to your computer and use it in GitHub Desktop.
Save chatman/8dfc265eb6d13d2a20bb38948fdc112d to your computer and use it in GitHub Desktop.
# This program is equivalent of this shell command, but using multple threads
# for i in {14566365..24090104}; do wget -q https://hacker-news.firebaseio.com/v0/item/$i.json; done
import threading
import urllib2
import time
def fetch_url(url):
urlHandler = urllib2.urlopen("https://hacker-news.firebaseio.com/v0/item/" + str(url) + ".json")
html = urlHandler.read()
with open(str(url) + ".json", "w") as text_file:
text_file.write(html)
begin = 14566365
end = 24090104
gap = 5000
for w in range(begin, end, gap):
start = time.time()
urls = range(w, w+gap)
print("Started fetching " + str(w))
threads = [threading.Thread(target=fetch_url, args=(url,)) for url in urls]
for thread in threads:
thread.start()
for thread in threads:
thread.join()
print "Elapsed Time: %s" % (time.time() - start)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment