Skip to content

Instantly share code, notes, and snippets.

@adw
Created November 2, 2012 02:08
Show Gist options
  • Save adw/3998231 to your computer and use it in GitHub Desktop.
Save adw/3998231 to your computer and use it in GitHub Desktop.
HOW EXCITED IS DAN SINKER
import lxml.html
import lxml.cssselect
import urllib
import re
def main(victim):
sel = lxml.cssselect.CSSSelector('p.js-tweet-text')
tweets = sel(lxml.html.parse(victim))
tweet_text = [lxml.html.tostring(x, encoding="unicode", method="text").encode("ascii", "ignore").strip() for x in tweets]
# ignoring whitespace...
total_chars = sum(sum(len(x.strip()) for x in y.split()) for y in tweet_text)
# match any uppercase chars
matcher = re.compile("[A-Z]*")
uppercase_chars = sum(sum(len(x.strip()) for x in matcher.findall(y)) for y in tweet_text)
print total_chars, uppercase_chars, (100.0*uppercase_chars)/total_chars
if __name__ == "__main__":
VICTIM = "http://twitter.com/dansinker"
main(VICTIM)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment