Brian Abelson abelsonlive

## cosine_similarity.R
# lets make some dummy data
n_rows <- 1000
n_cols <- 100
mat <- matrix(0, nrow=n_rows, ncol=n_cols)
mat <- apply(mat, 2, function(x) { return(rbinom(n_rows, size=1, prob=0.1))})
colnames(mat) <- paste0("event", 1:n_cols)
rownames(mat) <- paste0("pol", 1:n_rows)

# lets take a look at it before we do some math
head(mat)

## gruntification.py
from selenium import webdriver
from random import choice
import time

b = webdriver.Firefox()
b.get("http://www.nytimes.com/interactive/2013/09/02/sports/tennis/tennis-grunts-soundboard.html")
grunt_div = b.find_element_by_id('nytmm')
face_divs = grunt_div.find_elements_by_tag_name('div')

interval = [float(s)/100 for s in range(50,151,1)]

## homepage.py
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException

def get_image_for_a_link(link):
    try:
        img = link.find_element_by_tag_name("img")
    except NoSuchElementException:
        img = None
    if img is not None:
        is_img = 1

## wikimedia_dumps.py
from thready import threaded
import requests
import gzip
from StringIO import StringIO
import re
from datetime import datetime

def url_to_date(url):
  d = "".join(url.split("/")[-1].split(".")[0].split("-")[1:3])
  return datetime.strptime(d, "%Y%m%d%H%M%S").strftime("%Y-%m-%d %H:%M:%S")

## gist:6242472
import random

class Markov(object):

  def __init__(self, text):
    self.cache = {}
    self.text = text
    self.words = self.text_to_words()
    self.word_size = len(self.words)
    self.database()

## git-remove-history.sh
#!/bin/bash
set -o errexit

# Author: David Underhill
# Script to permanently delete files/folders from your git repository.  To use
# it, cd to your repository's root and then run the script with a list of paths
# you want to delete, e.g., git-delete-history path1 path2
#
#  retrieved from: http://dound.com/2009/04/git-forever-remove-files-or-folders-from-history/
#

## treasury_munging.R
# load in libraries
library("lubridate")
library("plyr")
library("RColorBrewer")

# where is your data located, lets switch to that dir!
PATH_TO_DATA_DIR <- "~/Dropbox/code/federal-treasury-api/data/lifetime_csv"
setwd(PATH_TO_DATA_DIR)

# read in csv. make sure to have stringsAsFactors=FALSE

## gist:5893375
#!/usr/bin/env python2
# Derived from scraperwiki/dumptruck-web, MIT license

import os
import json
import sqlite3
import dumptruck
from bottle import route, run, response, static_file

class QueryError(Exception):

## get_articles.py
import pandas
import requests
import re
import urllib
import itertools
import uuid
import json
from urlparse import urljoin
from BeautifulSoup import BeautifulSoup
from datetime import datetime

## nyt_homepage_tracker.py
import re
import pandas as pd
import boto.s3
from boto.s3.key import Key
import sys
import os
from selenium import webdriver
from contextlib import closing
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException
	# lets make some dummy data
	n_rows <- 1000
	n_cols <- 100
	mat <- matrix(0, nrow=n_rows, ncol=n_cols)
	mat <- apply(mat, 2, function(x) { return(rbinom(n_rows, size=1, prob=0.1))})
	colnames(mat) <- paste0("event", 1:n_cols)
	rownames(mat) <- paste0("pol", 1:n_rows)

	# lets take a look at it before we do some math
	head(mat)
	from selenium import webdriver
	from random import choice
	import time

	b = webdriver.Firefox()
	b.get("http://www.nytimes.com/interactive/2013/09/02/sports/tennis/tennis-grunts-soundboard.html")
	grunt_div = b.find_element_by_id('nytmm')
	face_divs = grunt_div.find_elements_by_tag_name('div')

	interval = [float(s)/100 for s in range(50,151,1)]
	from selenium import webdriver
	from selenium.common.exceptions import NoSuchElementException

	def get_image_for_a_link(link):
	try:
	img = link.find_element_by_tag_name("img")
	except NoSuchElementException:
	img = None
	if img is not None:
	is_img = 1
	from thready import threaded
	import requests
	import gzip
	from StringIO import StringIO
	import re
	from datetime import datetime

	def url_to_date(url):
	d = "".join(url.split("/")[-1].split(".")[0].split("-")[1:3])
	return datetime.strptime(d, "%Y%m%d%H%M%S").strftime("%Y-%m-%d %H:%M:%S")
	import random

	class Markov(object):

	def __init__(self, text):
	self.cache = {}
	self.text = text
	self.words = self.text_to_words()
	self.word_size = len(self.words)
	self.database()
	# load in libraries
	library("lubridate")
	library("plyr")
	library("RColorBrewer")

	# where is your data located, lets switch to that dir!
	PATH_TO_DATA_DIR <- "~/Dropbox/code/federal-treasury-api/data/lifetime_csv"
	setwd(PATH_TO_DATA_DIR)

	# read in csv. make sure to have stringsAsFactors=FALSE
	#!/usr/bin/env python2
	# Derived from scraperwiki/dumptruck-web, MIT license

	import os
	import json
	import sqlite3
	import dumptruck
	from bottle import route, run, response, static_file

	class QueryError(Exception):
	import pandas
	import requests
	import re
	import urllib
	import itertools
	import uuid
	import json
	from urlparse import urljoin
	from BeautifulSoup import BeautifulSoup
	from datetime import datetime
	import re
	import pandas as pd
	import boto.s3
	from boto.s3.key import Key
	import sys
	import os
	from selenium import webdriver
	from contextlib import closing
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.common.exceptions import NoSuchElementException