Skip to content

Instantly share code, notes, and snippets.

@ndpar
Created December 13, 2012 12:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ndpar/4276136 to your computer and use it in GitHub Desktop.
Save ndpar/4276136 to your computer and use it in GitHub Desktop.
Analyzing popularity on StackOverflow
import urllib2
import zlib
import json
import pymongo
import sys
URL = 'http://api.stackoverflow.com/1.1'
ANSWERS = URL + '/questions/{0}/answers'
def load_url(url):
response = zlib.decompress(urllib2.urlopen(url).read(), 16 + zlib.MAX_WBITS)
return json.loads(response)
def answers(qid):
url = ANSWERS.format(qid)
return load_url(url)
def update(coll, id, answers):
coll.update({'_id':id}, {'$set': {'answers':answers}})
def main():
c = pymongo.MongoClient(host='mongodb://localhost:27017', w=1, j=True)
db = c.test
coll = db.stackoverflow
try:
iter = coll.find({'answers':{'$exists':False}})
for q in iter:
qid = q['_id']
update(coll, qid, answers(qid)['answers'])
except:
print 'Error trying to write to collection', sys.exc_info()
main()
import urllib2
import zlib
import json
import math
import pymongo
import sys
URL = 'http://api.stackoverflow.com/1.1'
TAGGED = URL + '/search?tagged={0}&page={1}'
def load_url(url):
response = zlib.decompress(urllib2.urlopen(url).read(), 16 + zlib.MAX_WBITS)
return json.loads(response)
def load_page(tag, page):
url = TAGGED.format(tag, page)
return load_url(url)
def concat(lists, initializer = []):
return reduce(lambda x, y: x + y, lists, initializer)
def load_pages(tag):
first = load_page(tag, 1)
total_pages = int(math.ceil(1. * first['total'] / first['pagesize']))
return [first] + [load_page(tag, i+1) for i in range(1, total_pages)]
def questions(tag):
pages = load_pages(tag)
return concat([p['questions'] for p in pages])
def insert(coll, records):
for r in records:
r['_id'] = r['question_id']
coll.save(r)
def main(tags):
c = pymongo.MongoClient(host='mongodb://localhost:27017', w=1, j=True)
db = c.test
coll = db.stackoverflow
try:
for tag in tags:
insert(coll, questions(tag))
except:
print 'Error trying to write to collection', sys.exc_info()
main(['activemq', 'rabbitmq', 'zeromq', 'hornetq'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment