Skip to content

Instantly share code, notes, and snippets.

View abelsonlive's full-sized avatar
🕳️
[ o o ]

Brian Abelson abelsonlive

🕳️
[ o o ]
View GitHub Profile
@abelsonlive
abelsonlive / cosine_similarity.R
Last active August 20, 2018 13:38
Cosine Distance Recommendation / Collaborative Filtering Example
# lets make some dummy data
n_rows <- 1000
n_cols <- 100
mat <- matrix(0, nrow=n_rows, ncol=n_cols)
mat <- apply(mat, 2, function(x) { return(rbinom(n_rows, size=1, prob=0.1))})
colnames(mat) <- paste0("event", 1:n_cols)
rownames(mat) <- paste0("pol", 1:n_rows)
# lets take a look at it before we do some math
head(mat)
@abelsonlive
abelsonlive / gruntification.py
Created September 6, 2013 19:46
run this first, ask questions later
from selenium import webdriver
from random import choice
import time
b = webdriver.Firefox()
b.get("http://www.nytimes.com/interactive/2013/09/02/sports/tennis/tennis-grunts-soundboard.html")
grunt_div = b.find_element_by_id('nytmm')
face_divs = grunt_div.find_elements_by_tag_name('div')
interval = [float(s)/100 for s in range(50,151,1)]
@abelsonlive
abelsonlive / homepage.py
Last active December 22, 2015 11:29
homepage scraping
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
def get_image_for_a_link(link):
try:
img = link.find_element_by_tag_name("img")
except NoSuchElementException:
img = None
if img is not None:
is_img = 1
@abelsonlive
abelsonlive / wikimedia_dumps.py
Created August 31, 2013 19:34
get wikipedia pageviews
from thready import threaded
import requests
import gzip
from StringIO import StringIO
import re
from datetime import datetime
def url_to_date(url):
d = "".join(url.split("/")[-1].split(".")[0].split("-")[1:3])
return datetime.strptime(d, "%Y%m%d%H%M%S").strftime("%Y-%m-%d %H:%M:%S")
import random
class Markov(object):
def __init__(self, text):
self.cache = {}
self.text = text
self.words = self.text_to_words()
self.word_size = len(self.words)
self.database()
#!/bin/bash
set -o errexit
# Author: David Underhill
# Script to permanently delete files/folders from your git repository. To use
# it, cd to your repository's root and then run the script with a list of paths
# you want to delete, e.g., git-delete-history path1 path2
#
# retrieved from: http://dound.com/2009/04/git-forever-remove-files-or-folders-from-history/
#
@abelsonlive
abelsonlive / treasury_munging.R
Created July 12, 2013 06:51
munge treasury.io data for table_ii into data-sciencey format.
# load in libraries
library("lubridate")
library("plyr")
library("RColorBrewer")
# where is your data located, lets switch to that dir!
PATH_TO_DATA_DIR <- "~/Dropbox/code/federal-treasury-api/data/lifetime_csv"
setwd(PATH_TO_DATA_DIR)
# read in csv. make sure to have stringsAsFactors=FALSE
@abelsonlive
abelsonlive / gist:5893375
Created June 30, 2013 01:19
scraperwiki api
#!/usr/bin/env python2
# Derived from scraperwiki/dumptruck-web, MIT license
import os
import json
import sqlite3
import dumptruck
from bottle import route, run, response, static_file
class QueryError(Exception):
import pandas
import requests
import re
import urllib
import itertools
import uuid
import json
from urlparse import urljoin
from BeautifulSoup import BeautifulSoup
from datetime import datetime
import re
import pandas as pd
import boto.s3
from boto.s3.key import Key
import sys
import os
from selenium import webdriver
from contextlib import closing
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException