Skip to content

Instantly share code, notes, and snippets.

@djipko
Created February 10, 2012 22:18
Show Gist options
  • Save djipko/1793535 to your computer and use it in GitHub Desktop.
Save djipko/1793535 to your computer and use it in GitHub Desktop.
Emebdly solution #2
from HTMLParser import HTMLParser
from operator import add
from math import sqrt, pow
import requests
class EmbedlyParser(HTMLParser):
depth = 1
ps = []
article = False
def handle_starttag(self, tag, atrs):
if tag == 'article':
self.article = True
if self.article:
if tag != 'p':
self.depth += 1
else:
self.ps.append(self.depth)
def handle_endtag(self, tag):
if tag == 'article':
self.article = False
if self.article and tag != 'p':
self.depth -= 1
if __name__ == "__main__":
r = requests.get("http://apply.embed.ly/static/data/2.html")
parser = EmbedlyParser()
parser.feed(r.content)
mean = float(reduce(add, parser.ps))/float(len(parser.ps))
dif_sq = [pow((p - mean),2) for p in parser.ps]
dif_sq_mean = float(reduce(add, dif_sq))/float(len(dif_sq))
dev = sqrt(dif_sq_mean)
print "Deviation is %f" %dev
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment