Skip to content

Instantly share code, notes, and snippets.

@Radagaisus
Created December 22, 2011 12:43
Show Gist options
  • Save Radagaisus/1510177 to your computer and use it in GitHub Desktop.
Save Radagaisus/1510177 to your computer and use it in GitHub Desktop.
Correlation between NYT Headlines and Baby Names (there is none)
#!/usr/bin/env python
# encoding: utf-8
"""
Check the number of mentions of each name
from the baby names list in each year new york times
newspapers and search for a coorelation
"""
import urllib
import json
import re
import time
import numpy as np
from itertools import imap
# scumbag scipy
def pearsonr(x, y):
# Assume len(x) == len(y)
n = len(x)
sum_x = float(sum(x))
sum_y = float(sum(y))
sum_x_sq = sum(map(lambda x: pow(x, 2), x))
sum_y_sq = sum(map(lambda x: pow(x, 2), y))
psum = sum(imap(lambda x, y: x * y, x, y))
num = psum - (sum_x * sum_y/n)
den = pow((sum_x_sq - pow(sum_x, 2) / n) * (sum_y_sq - pow(sum_y, 2) / n), 0.5)
if den == 0: return 0
return num / den
def main():
api_key = "your_api_key"
# Open the list and start iterating
# 2011:
# 1 male female
with open('names.txt', 'r') as names:
year = 0
name_ranks = []
results = []
for line, name in enumerate(names):
if line == 0 or line % 50 == 0:
year = name.rstrip()
print year
else:
(ranking, name, female) = re.sub('(\s+)',',', name).rstrip(',').split(',')
print "Getting number of headlines for the name " + name + " for the year " + year
url = "http://api.nytimes.com/svc/search/v1/article?format=json&query=title%3A" + name + "&begin_date=" + year + "0101&end_date=" + year + "1231&api-key=" + api_key
# Call NYT
print "Request sent to The New York Times"
data = urllib.urlopen(url)
# Convert JSON
print "Response Received..."
print "Parsing results..."
result = json.loads(data.read())
# Parse results
print "Rank: " + ranking + ", Results: " + str(result.get('total'))
name_ranks += [int(ranking)]
results += [result.get('total')]
print "Going to sleep.\n---------------------\n"
# We have a rate limit of 10 calls per second
time.sleep(0.2)
print "correlation: "
print pearsonr(name_ranks, results)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment