Radagaisus/nyt.py

## nyt.py
#!/usr/bin/env python
# encoding: utf-8
"""
Check the number of mentions of each name
from the baby names list in each year new york times
newspapers and search for a coorelation
"""

import urllib
import json
import re
import time
import numpy as np
from itertools import imap

# scumbag scipy
def pearsonr(x, y):
  # Assume len(x) == len(y)
  n = len(x)
  sum_x = float(sum(x))
  sum_y = float(sum(y))
  sum_x_sq = sum(map(lambda x: pow(x, 2), x))
  sum_y_sq = sum(map(lambda x: pow(x, 2), y))
  psum = sum(imap(lambda x, y: x * y, x, y))
  num = psum - (sum_x * sum_y/n)
  den = pow((sum_x_sq - pow(sum_x, 2) / n) * (sum_y_sq - pow(sum_y, 2) / n), 0.5)
  if den == 0: return 0
  return num / den

def main():
	api_key = "your_api_key"

	# Open the list and start iterating
	# 2011:
	# 1 male female
	with open('names.txt', 'r') as names:
		year = 0
		name_ranks = []
		results = []
		for line, name in enumerate(names):
			if line == 0 or line % 50 == 0:
				year = name.rstrip()
				print year
			else:
				(ranking, name, female) = re.sub('(\s+)',',', name).rstrip(',').split(',')

				print "Getting number of headlines for the name " + name + " for the year " + year
				url = "http://api.nytimes.com/svc/search/v1/article?format=json&query=title%3A" + name + "&begin_date=" + year + "0101&end_date=" + year + "1231&api-key=" + api_key

				# Call NYT
				print "Request sent to The New York Times"
				data = urllib.urlopen(url)

				# Convert JSON
				print "Response Received..."
				print "Parsing results..."
				result = json.loads(data.read())

				# Parse results
				print "Rank: " + ranking + ", Results: " + str(result.get('total'))

				name_ranks += [int(ranking)]
				results += [result.get('total')]

				print "Going to sleep.\n---------------------\n"
				# We have a rate limit of 10 calls per second
				time.sleep(0.2)
		print "correlation: "
		print pearsonr(name_ranks, results)
if __name__ == '__main__':
	main()
	#!/usr/bin/env python
	# encoding: utf-8
	"""
	Check the number of mentions of each name
	from the baby names list in each year new york times
	newspapers and search for a coorelation
	"""

	import urllib
	import json
	import re
	import time
	import numpy as np
	from itertools import imap

	# scumbag scipy
	def pearsonr(x, y):
	# Assume len(x) == len(y)
	n = len(x)
	sum_x = float(sum(x))
	sum_y = float(sum(y))
	sum_x_sq = sum(map(lambda x: pow(x, 2), x))
	sum_y_sq = sum(map(lambda x: pow(x, 2), y))
	psum = sum(imap(lambda x, y: x * y, x, y))
	num = psum - (sum_x * sum_y/n)
	den = pow((sum_x_sq - pow(sum_x, 2) / n) * (sum_y_sq - pow(sum_y, 2) / n), 0.5)
	if den == 0: return 0
	return num / den

	def main():
	api_key = "your_api_key"

	# Open the list and start iterating
	# 2011:
	# 1 male female
	with open('names.txt', 'r') as names:
	year = 0
	name_ranks = []
	results = []
	for line, name in enumerate(names):
	if line == 0 or line % 50 == 0:
	year = name.rstrip()
	print year
	else:
	(ranking, name, female) = re.sub('(\s+)',',', name).rstrip(',').split(',')

	print "Getting number of headlines for the name " + name + " for the year " + year
	url = "http://api.nytimes.com/svc/search/v1/article?format=json&query=title%3A" + name + "&begin_date=" + year + "0101&end_date=" + year + "1231&api-key=" + api_key

	# Call NYT
	print "Request sent to The New York Times"
	data = urllib.urlopen(url)

	# Convert JSON
	print "Response Received..."
	print "Parsing results..."
	result = json.loads(data.read())

	# Parse results
	print "Rank: " + ranking + ", Results: " + str(result.get('total'))

	name_ranks += [int(ranking)]
	results += [result.get('total')]

	print "Going to sleep.\n---------------------\n"
	# We have a rate limit of 10 calls per second
	time.sleep(0.2)
	print "correlation: "
	print pearsonr(name_ranks, results)
	if __name__ == '__main__':
	main()