Claire Willett gitabites

## nyt_mostpop.py
import requests
import json
import csv

#here, we're going to retrieve the most shared articles from the NYT within the past 30 days and determine which author has contributed the most articles
#first, we hit up the most-shared endpoint and store the response as an object
pop_request = 'http://api.nytimes.com/svc/mostpopular/v2/mostshared/all-sections/30.json?api-key=[your-key]'
pop = requests.get(pop_request)

#store just the content

## perac_2.py
# modified from http://brianabelson.com/open-news/2013/12/17/scrape-the-gibson.html

import requests
from bs4 import BeautifulSoup
from pprint import pprint
from urlparse import urljoin
import dataset
import csv
import pandas as pd

## Main_Skim_Date_Time
#Main vs skim emails
mainboxCount = db.emails.find({"category": "main"}).count()
print "Number of emails in mainbox: " + str(mainboxCount)

skimboxCount = db.emails.find({"category": "skim"}).count()
print "Number of emails in skimbox:" + str(skimboxCount)

#to find main emails received at given day of week:
mainMonday = db.emails.find({"headers.Date":{"$regex":"Mon"}, "category": "main"}).count()
print "Number of mainbox emails received on Mondays: " + str(mainMonday)

## Reclassification queries
#number of reclassifications, cumulative
skimToMain = db.recategorization.find({"recategorizedTo": "main"}).count()
print "Number of messages recategorized to main, cummulative:" + str(skimToMain)

mainToSkim = db.recategorization.find({"recategorizedTo": "skim"}).count()
print "Number of messages recategorized to skim, cummulative:" + str(maintoSkim)


#Weekly reclassifications
from datetime import date, timedelta
	import requests
	import json
	import csv

	#here, we're going to retrieve the most shared articles from the NYT within the past 30 days and determine which author has contributed the most articles
	#first, we hit up the most-shared endpoint and store the response as an object
	pop_request = 'http://api.nytimes.com/svc/mostpopular/v2/mostshared/all-sections/30.json?api-key=[your-key]'
	pop = requests.get(pop_request)

	#store just the content
	# modified from http://brianabelson.com/open-news/2013/12/17/scrape-the-gibson.html

	import requests
	from bs4 import BeautifulSoup
	from pprint import pprint
	from urlparse import urljoin
	import dataset
	import csv
	import pandas as pd
	#Main vs skim emails
	mainboxCount = db.emails.find({"category": "main"}).count()
	print "Number of emails in mainbox: " + str(mainboxCount)

	skimboxCount = db.emails.find({"category": "skim"}).count()
	print "Number of emails in skimbox:" + str(skimboxCount)

	#to find main emails received at given day of week:
	mainMonday = db.emails.find({"headers.Date":{"$regex":"Mon"}, "category": "main"}).count()
	print "Number of mainbox emails received on Mondays: " + str(mainMonday)
	#number of reclassifications, cumulative
	skimToMain = db.recategorization.find({"recategorizedTo": "main"}).count()
	print "Number of messages recategorized to main, cummulative:" + str(skimToMain)

	mainToSkim = db.recategorization.find({"recategorizedTo": "skim"}).count()
	print "Number of messages recategorized to skim, cummulative:" + str(maintoSkim)


	#Weekly reclassifications
	from datetime import date, timedelta