Skip to content

Instantly share code, notes, and snippets.

@daviddesancho
Created July 25, 2014 14:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save daviddesancho/04efa74ca14741cb69a9 to your computer and use it in GitHub Desktop.
Save daviddesancho/04efa74ca14741cb69a9 to your computer and use it in GitHub Desktop.
import sys, os
import json
import urllib2
import tempfile
import multiprocessing as mp
def worker(url):
""" reading lots of data """
print "# Process %s running on file %s"%(mp.current_process(), url)
raw = json.load(urllib2.urlopen(url))
_, tmpath = tempfile.mkstemp()
file = open(tmpath, "w")
for r in raw:
file.write("%s\n"%r)
return tmpath
# example urls to read
url = "http://eu.battle.net/auction-data/258993a3c6b974ef3e6f22ea6f822720/auctions.json"
urls_to_parse = (url, url, url), url, url, url, url, url, url, url, url, \
url, url, url, url, url, url, url, url, url, url, url, url, url, \
url, url, url, url, url, url, url, url, url, url, url, url, url, \
url, url, url, url, url, url, url, url, url, url, url, url, url, \
url, url, url, url, url, url, url, url, url, url, url, url, url, \
url, url, url, url, url, url, url, url, url, url, url, url, url)
# multiprocessing options
nproc = mp.cpu_count()
pool = mp.Pool(processes=nproc)
temps = pool.map(worker, urls_to_parse)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment