John Ramey ramhiser

## boundieboxes.py
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatch
import matplotlib.cm as cm
import cv2
import csv
from sklearn import cluster


def find_points(gray_img, color_img, num_points):

## exercise-4.7.r
library(dplyr)
library(BHH2)

df <- expand.grid(drivers=c('I', 'II', 'III', 'IV'),
                  cars=1:4)
df <- rbind(df, df) %>% arrange(drivers, cars)
df$treatment <- c(
  rep(c('A', 'B', 'D', 'C'), each=2),
  rep(c('D', 'C', 'A', 'B'), each=2),
  rep(c('B', 'D', 'C', 'A'), each=2),

## leaflet-county-explorer.r
# TODO: Add a Shiny dropdown to select demographic variable
library(leaflet)
library(noncensus)
library(dplyr)

data("counties", package="noncensus")
data("county_polygons", package="noncensus")
data("quick_facts", package="noncensus")

counties <- counties %>%

## add-na-rows.r
# Useful for drawing polygons with leaflet
# Polygons are stored in a `tbl_df` object with a mandatory `NA` row between each
# polygon so that `leaflet` knows to stop drawing between each polygon.
# Rather than magic, I found a slick way to do this via `dplyr::arrange`
# See: (http://stackoverflow.com/a/25267681/234233).

# Example using Iris data set:
df_na <- matrix(NA, nrow=nlevels(iris$Species), ncol=ncol(iris) - 1)
df_na <- tbl_df(as.data.frame(df_na))
colnames(df_na) <- setdiff(colnames(iris), "Species")

## austin-thd-stores.html

<!DOCTYPE html>
<html>
<head>
	<title>Leaflet Example -- Home Depot Stores</title>
	<meta charset="utf-8" />

	<meta name="viewport" content="width=device-width, initial-scale=1.0">

	 <link rel="stylesheet" href="http://cdn.leafletjs.com/leaflet-0.7.3/leaflet.css" />

## impute-naive.r
#' Naive imputation of missing data
#'
#' Imputes missing data in a data frame a column at a time, e.g., univariate.
#' Missing numeric values are replaced with the median. Similarly, missing
#' factor values are replaced with the mode.
#'
#' If \code{draw} is set to \code{TRUE}, missing data are drawn from a basic
#' distribution to make the imputation slightly less naive. For continuous,
#' values are drawn from a uniform distribution ranging from the min to max
#' values observed within the column. For categorical, values are drawn from a

## schools.r
# SAT scores data from Table 5.2 on page 120 of Gelman's BDA3 text
y <- c(28, 8, -3, 7, -1, 1, 18, 12)
sigma <- c(15, 10, 16, 11, 9, 11, 10, 18)

# Goal: Replicate calculations in Section 5.5
# Instructions for posterior simulation given on page 118
library(itertools2)

# Equation 5.21 on page 117
tau_posterior <- function(tau, y, sigma) {

## one-hot.py
import pandas as pd
import numpy as np
from sklearn.feature_extraction import DictVectorizer

def encode_onehot(df, cols):
    """
    One-hot encoding is applied to columns specified in a pandas DataFrame.

    Modified from: https://gist.github.com/kljensen/5452382


## fill-product.py
# Goal: Fill missing date/group pairs with fill_value using Cartesian product of indices
import pandas as pd

def fill_product(df, index, fill_value=0):
    """
    Fills a DataFrame with the Cartesian product of the given indices.

    See: http://stackoverflow.com/a/16994910/234233

    Example:

## date-range.py
from datetime import datetime, timedelta

def date_range(start, end, step=7, date_format="%m-%d-%Y"):
    """
    Creates generator with a range of dates.
    The dates occur every 7th day (default).

    :param start: the start date of the date range
    :param end: the end date of the date range
    :param step: the step size of the dates
	import numpy as np
	import matplotlib.pyplot as plt
	import matplotlib.patches as mpatch
	import matplotlib.cm as cm
	import cv2
	import csv
	from sklearn import cluster


	def find_points(gray_img, color_img, num_points):
	library(dplyr)
	library(BHH2)

	df <- expand.grid(drivers=c('I', 'II', 'III', 'IV'),
	cars=1:4)
	df <- rbind(df, df) %>% arrange(drivers, cars)
	df$treatment <- c(
	rep(c('A', 'B', 'D', 'C'), each=2),
	rep(c('D', 'C', 'A', 'B'), each=2),
	rep(c('B', 'D', 'C', 'A'), each=2),
	# TODO: Add a Shiny dropdown to select demographic variable
	library(leaflet)
	library(noncensus)
	library(dplyr)

	data("counties", package="noncensus")
	data("county_polygons", package="noncensus")
	data("quick_facts", package="noncensus")

	counties <- counties %>%
	# Useful for drawing polygons with leaflet
	# Polygons are stored in a `tbl_df` object with a mandatory `NA` row between each
	# polygon so that `leaflet` knows to stop drawing between each polygon.
	# Rather than magic, I found a slick way to do this via `dplyr::arrange`
	# See: (http://stackoverflow.com/a/25267681/234233).

	# Example using Iris data set:
	df_na <- matrix(NA, nrow=nlevels(iris$Species), ncol=ncol(iris) - 1)
	df_na <- tbl_df(as.data.frame(df_na))
	colnames(df_na) <- setdiff(colnames(iris), "Species")

	<!DOCTYPE html>
	<html>
	<head>
	<title>Leaflet Example -- Home Depot Stores</title>
	<meta charset="utf-8" />

	<meta name="viewport" content="width=device-width, initial-scale=1.0">

	<link rel="stylesheet" href="http://cdn.leafletjs.com/leaflet-0.7.3/leaflet.css" />
	#' Naive imputation of missing data
	#'
	#' Imputes missing data in a data frame a column at a time, e.g., univariate.
	#' Missing numeric values are replaced with the median. Similarly, missing
	#' factor values are replaced with the mode.
	#'
	#' If \code{draw} is set to \code{TRUE}, missing data are drawn from a basic
	#' distribution to make the imputation slightly less naive. For continuous,
	#' values are drawn from a uniform distribution ranging from the min to max
	#' values observed within the column. For categorical, values are drawn from a
	# SAT scores data from Table 5.2 on page 120 of Gelman's BDA3 text
	y <- c(28, 8, -3, 7, -1, 1, 18, 12)
	sigma <- c(15, 10, 16, 11, 9, 11, 10, 18)

	# Goal: Replicate calculations in Section 5.5
	# Instructions for posterior simulation given on page 118
	library(itertools2)

	# Equation 5.21 on page 117
	tau_posterior <- function(tau, y, sigma) {
	import pandas as pd
	import numpy as np
	from sklearn.feature_extraction import DictVectorizer

	def encode_onehot(df, cols):
	"""
	One-hot encoding is applied to columns specified in a pandas DataFrame.

	Modified from: https://gist.github.com/kljensen/5452382
	# Goal: Fill missing date/group pairs with fill_value using Cartesian product of indices
	import pandas as pd

	def fill_product(df, index, fill_value=0):
	"""
	Fills a DataFrame with the Cartesian product of the given indices.

	See: http://stackoverflow.com/a/16994910/234233

	Example:
	from datetime import datetime, timedelta

	def date_range(start, end, step=7, date_format="%m-%d-%Y"):
	"""
	Creates generator with a range of dates.
	The dates occur every 7th day (default).

	:param start: the start date of the date range
	:param end: the end date of the date range
	:param step: the step size of the dates