TRManderson/graph.png

## graph.png

      
    Raw
  

              graph.png
            
          
## lesswrong_openthread.py
import bs4
import datetime as dt

def parseDate(comment):
	comment=comment.findChildren("div",class_="entry",recursive=False)[0]
	commentmeta=comment.findChildren("div",class_="comment-meta",recursive=False)[0]
	try:
		commentdate=commentmeta.findChildren("span",class_="comment-date")[0].contents[0].strip()
	except:
		if "comment score below threshold" in commentmeta.text:
			return None
		else:
			raise
	return dt.datetime.strptime(commentdate,"%d %B %Y %I:%M:%S%p")

def dropNone(ls):
	return [x for x in ls if x != None]


def pageDate(soup):
	meta=soup.findAll("div",class_="meta")[0]
	date=meta.findAll("span",class_="date")[0]
	date=date.contents[0].strip()
	return dt.datetime.strptime(date,"%d %B %Y %I:%M%p")


def parsePageToplevel(soup):
	commentArea=soup.findAll("div",id="comments")[0].findChildren("div",class_="sitetable",recursive=False)[0]
	comments=commentArea.findChildren("div",class_="comment",recursive=False)
	commentDates = map(parseDate,comments)
	pagedate=pageDate(soup)
	return map(lambda x:(x-pagedate).total_seconds()/60,dropNone(commentDates))


def parsePageTotal(soup):
	commentArea=soup.findAll("div",id="comments")[0].findChildren("div",class_="sitetable",recursive=False)[0]
	comments=commentArea.findChildren("div",class_="comment",recursive=True)
	commentDates = map(parseDate,comments)
	pagedate=pageDate(soup)
	return map(lambda x:(x-pagedate).total_seconds()/60,dropNone(commentDates))


import matplotlib.pyplot as plt
from pandas import Series

def plotSeries(timedeltas,multiplier=1,name=None):
	timedeltas.sort()
	s=Series([(i+1)*multiplier for i,v in enumerate(timedeltas)],timedeltas)
	s.plot(label=name)


import mechanize

root="http://lesswrong.com/r/discussion/tag/open_thread/"

browser=mechanize.Browser()
browser.open(root)

listing=bs4.BeautifulSoup(browser.response())

n=20

links=[x for x in listing.findAll("h2") if x.get("itemprop")=="name"][:n]
links= map(lambda x:x.findChildren()[0]["href"],links)
links = map(lambda x:"http://lesswrong.com/"+x,links)

pages=map(lambda x:mechanize.urlopen(x).read(),links)
soups=map(lambda x:bs4.BeautifulSoup(x),pages)

totals=[]
tops=[]
map(lambda x:totals.extend(x),map(parsePageTotal,soups))
map(lambda x:tops.extend(x),map(parsePageToplevel,soups))

plt.figure(ylabel="Number of comments",xlabel="Time since thread created (minutes)")
plotSeries(tops,1./n,name="Top level comments")
plotSeries(totals,1./n,name="Total comments")
plt.show()
	import bs4
	import datetime as dt

	def parseDate(comment):
	comment=comment.findChildren("div",class_="entry",recursive=False)[0]
	commentmeta=comment.findChildren("div",class_="comment-meta",recursive=False)[0]
	try:
	commentdate=commentmeta.findChildren("span",class_="comment-date")[0].contents[0].strip()
	except:
	if "comment score below threshold" in commentmeta.text:
	return None
	else:
	raise
	return dt.datetime.strptime(commentdate,"%d %B %Y %I:%M:%S%p")

	def dropNone(ls):
	return [x for x in ls if x != None]


	def pageDate(soup):
	meta=soup.findAll("div",class_="meta")[0]
	date=meta.findAll("span",class_="date")[0]
	date=date.contents[0].strip()
	return dt.datetime.strptime(date,"%d %B %Y %I:%M%p")


	def parsePageToplevel(soup):
	commentArea=soup.findAll("div",id="comments")[0].findChildren("div",class_="sitetable",recursive=False)[0]
	comments=commentArea.findChildren("div",class_="comment",recursive=False)
	commentDates = map(parseDate,comments)
	pagedate=pageDate(soup)
	return map(lambda x:(x-pagedate).total_seconds()/60,dropNone(commentDates))


	def parsePageTotal(soup):
	commentArea=soup.findAll("div",id="comments")[0].findChildren("div",class_="sitetable",recursive=False)[0]
	comments=commentArea.findChildren("div",class_="comment",recursive=True)
	commentDates = map(parseDate,comments)
	pagedate=pageDate(soup)
	return map(lambda x:(x-pagedate).total_seconds()/60,dropNone(commentDates))


	import matplotlib.pyplot as plt
	from pandas import Series

	def plotSeries(timedeltas,multiplier=1,name=None):
	timedeltas.sort()
	s=Series([(i+1)*multiplier for i,v in enumerate(timedeltas)],timedeltas)
	s.plot(label=name)





	import mechanize

	root="http://lesswrong.com/r/discussion/tag/open_thread/"

	browser=mechanize.Browser()
	browser.open(root)

	listing=bs4.BeautifulSoup(browser.response())

	n=20

	links=[x for x in listing.findAll("h2") if x.get("itemprop")=="name"][:n]
	links= map(lambda x:x.findChildren()[0]["href"],links)
	links = map(lambda x:"http://lesswrong.com/"+x,links)

	pages=map(lambda x:mechanize.urlopen(x).read(),links)
	soups=map(lambda x:bs4.BeautifulSoup(x),pages)

	totals=[]
	tops=[]
	map(lambda x:totals.extend(x),map(parsePageTotal,soups))
	map(lambda x:tops.extend(x),map(parsePageToplevel,soups))

	plt.figure(ylabel="Number of comments",xlabel="Time since thread created (minutes)")
	plotSeries(tops,1./n,name="Top level comments")
	plotSeries(totals,1./n,name="Total comments")
	plt.show()