Skip to content

Instantly share code, notes, and snippets.

@gitabites
gitabites / nyt_mostpop.py
Created January 18, 2016 18:43
NYT Most Shared Articles
import requests
import json
import csv
#here, we're going to retrieve the most shared articles from the NYT within the past 30 days and determine which author has contributed the most articles
#first, we hit up the most-shared endpoint and store the response as an object
pop_request = 'http://api.nytimes.com/svc/mostpopular/v2/mostshared/all-sections/30.json?api-key=[your-key]'
pop = requests.get(pop_request)
#store just the content
@gitabites
gitabites / perac_2.py
Created June 3, 2014 13:11
Scraping PERAC for RFPS
# modified from http://brianabelson.com/open-news/2013/12/17/scrape-the-gibson.html
import requests
from bs4 import BeautifulSoup
from pprint import pprint
from urlparse import urljoin
import dataset
import csv
import pandas as pd
@gitabites
gitabites / Main_Skim_Date_Time
Last active December 28, 2015 03:59
Main vs Skim emails over date/time updated
#Main vs skim emails
mainboxCount = db.emails.find({"category": "main"}).count()
print "Number of emails in mainbox: " + str(mainboxCount)
 
skimboxCount = db.emails.find({"category": "skim"}).count()
print "Number of emails in skimbox:" + str(skimboxCount)
 
#to find main emails received at given day of week:
mainMonday = db.emails.find({"headers.Date":{"$regex":"Mon"}, "category": "main"}).count()
print "Number of mainbox emails received on Mondays: " + str(mainMonday)
@gitabites
gitabites / Reclassification queries
Last active December 27, 2015 00:49
Reclassification queries, updated 11/21/13 with weekly/biweekly options
#number of reclassifications, cumulative
skimToMain = db.recategorization.find({"recategorizedTo": "main"}).count()
print "Number of messages recategorized to main, cummulative:" + str(skimToMain)
mainToSkim = db.recategorization.find({"recategorizedTo": "skim"}).count()
print "Number of messages recategorized to skim, cummulative:" + str(maintoSkim)
#Weekly reclassifications
from datetime import date, timedelta