Created
December 29, 2011 19:01
-
-
Save zmcghee/1535647 to your computer and use it in GitHub Desktop.
Breakdown of movies released commercially in NYC since '98
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1998: 471 | |
1999: 472 | |
2000: 461 | |
2001: 453 | |
2002: 489 | |
2003: 480 | |
2004: 549 | |
2005: 592 | |
2006: 643 | |
2007: 647 | |
2008: 645 | |
2009: 607 | |
2010: 655 | |
2011: 786 | |
Average: 568 | |
Source: http://www.panix.com/~dangelo |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re, urllib2 | |
s = ['98','99','00','01','02','03','04','05','06','07','08','09','10',''] | |
t = [] | |
for year in s: | |
u = "http://www.panix.com/~dangelo/nymaster%s.html" % str(year) | |
html = urllib2.urlopen(u).read() | |
html = html.split('<b>Week of', 1)[1] | |
html = re.sub('\n\n', '\n', html) | |
html = re.sub('(\n|^)<br> <hr>\n', '\n', html) | |
html = re.sub('\n<hr>\n', '\n', html) | |
html = re.sub('\n<br>\n', '\n', html) | |
html = re.sub('\n<b>Week of.*\n', '\n', html) | |
html = html.split('\n', 1)[1] | |
html = html.rsplit('\n', 1)[0] | |
if year.startswith('9'): | |
y = "19" + str(year) | |
elif year != '': | |
y = "20" + str(year) | |
else: | |
y = "2011" | |
c = len(html.split('\n')) | |
print "%s: %s" % (y, c) | |
t.append(c) | |
print "" | |
print "Average: %s" % int(round(float(sum(t)) / len(t))) | |
print "" | |
print "Source: http://www.panix.com/~dangelo" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment