Skip to content

Instantly share code, notes, and snippets.

@daviddesancho
Created July 25, 2014 10:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save daviddesancho/4815fa48ea49e7e49691 to your computer and use it in GitHub Desktop.
Save daviddesancho/4815fa48ea49e7e49691 to your computer and use it in GitHub Desktop.
import sys
import json
import urllib2
import multiprocessing as mp
def worker(url):
""" reading lots of data """
print "# Process %s running on file %s"%(mp.current_process(), url)
raw = json.load(urllib2.urlopen(url))
data = {}
for r in raw:
data[r] = raw[r]
return data
# example urls to read
url = "http://eu.battle.net/auction-data/258993a3c6b974ef3e6f22ea6f822720/auctions.json"
urls_to_parse = (url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url, url)
# multiprocessing options
nproc = mp.cpu_count()
pool = mp.Pool(processes=nproc)
parsed = pool.map(worker, urls_to_parse)
pool.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment