John Ramey ramhiser

## census-regions.r
# For info about census regions, see:
# http://en.wikipedia.org/wiki/List_of_regions_of_the_United_States#Census_Bureau-designated_regions_and_divisions

# Region - Northeast

# Division - New England
new_england <- data.frame(
  region = "Northeast",
  division = "New England",
  state = c("ME", "NH", "VT", "MA", "RI", "CT")

## dataframe_multiindex_columns.py
import numpy as np
import pandas as pd
from itertools import chain, izip, repeat

np.random.seed(42)
num_rows = 10
num_features = 5
num_feature_values = 3

# Builds tuples of features with many values per feature

## filter-small-groups.r
library(dplyr)
group_size <- 20
foo <- iris[1:119, ]
filter(group_by(foo, Species), n() >= group_size)

## updated-mda-code.r
# Comment thread beginning here:
# http://ramhiser.com/blog/2013/07/02/a-brief-look-at-mixture-discriminant-analysis/#comment-1374749931
#
# I'm using version 0.4-4 of the `mda` package
library(mda)
test_data <- read.csv("ts2.csv")
colnames(test_data)[16] <- "filter"
mda_out <- mda(formula=fol_up_u ~ . - filter,
               data=test_data,
               CV=TRUE,

## adjacency-matrix.r
library(clusteval)
adjacency_matrix <- function(cluster_labels, names=NULL, force_symmetric=FALSE) {
  adj_matrix <- diag(length(cluster_labels))
  adj_matrix[lower.tri(adj_matrix)] <- clusteval::comembership(cluster_labels)
  if (force_symmetric) {
    adj_matrix <- adj_matrix + t(adj_matrix)
  }
  diag(adj_matrix) <- 0
  if (!is.null(names))
  rownames(adj_matrix) <- colnames(adj_matrix) <- names

## gompertz.R
# For the standard conjugate beta prior for a binomial likelihood, a typical
# approach is to weight each prior observation equally, there are times where
# the prior Bernoulli trials should be weighted over time, so that the more
# recent trials are weighted near 1 and the oldest trials should be weighted
# near 0.

# Gompertz Function
# http://en.wikipedia.org/wiki/Gompertz_function
gompertz <- function(x, a=1, b=1, c=1) {
  a * exp(-b * exp(-c * x))

## object-sizes.r
library(dplyr)

objects <- ls()
object_sizes <- sapply(objects, function(x) object.size(get(x)))
object_sizes <- data.frame(objects, object_sizes, row.names=NULL)
object_sizes$units_MB <- utils:::format.object_size(object_sizes$object_sizes, units="Mb")
dplyr::arrange(object_sizes, object_sizes)

## cut-pretty.r
#' Cuts a vector into factors with pretty levels
#'
#' @param x numeric vectory
#' @param breaks numeric vector of two ore more unique cut points
#' @param collapse character string to collapse factor labels
#' @param ... arguments passed to \code{\link[base]{cut}}
#' @return A \code{\link{factor}} is returned
#'
#' @examples
#' set.seed(42)

## pandas-fixed.py
import pandas as pd

df = pd.DataFrame([['01-02-2015', 'a', 17],
                  ['01-09-2015', 'a', 42],
                  ['01-30-2015', 'a', 19],
                  ['01-02-2015', 'b', 23],
                  ['01-23-2015', 'b', 1],
                  ['01-30-2015', 'b', 13]])
df.columns = ['date', 'group', 'response']
df.set_index(['date', 'group'], inplace=True)

## schools.r
# SAT scores data from Table 5.2 on page 120 of Gelman's BDA3 text
y <- c(28, 8, -3, 7, -1, 1, 18, 12)
sigma <- c(15, 10, 16, 11, 9, 11, 10, 18)

# Goal: Replicate calculations in Section 5.5
# Instructions for posterior simulation given on page 118
library(itertools2)

# Equation 5.21 on page 117
tau_posterior <- function(tau, y, sigma) {
	# For info about census regions, see:
	# http://en.wikipedia.org/wiki/List_of_regions_of_the_United_States#Census_Bureau-designated_regions_and_divisions

	# Region - Northeast

	# Division - New England
	new_england <- data.frame(
	region = "Northeast",
	division = "New England",
	state = c("ME", "NH", "VT", "MA", "RI", "CT")
	import numpy as np
	import pandas as pd
	from itertools import chain, izip, repeat

	np.random.seed(42)
	num_rows = 10
	num_features = 5
	num_feature_values = 3

	# Builds tuples of features with many values per feature
	library(dplyr)
	group_size <- 20
	foo <- iris[1:119, ]
	filter(group_by(foo, Species), n() >= group_size)
	# Comment thread beginning here:
	# http://ramhiser.com/blog/2013/07/02/a-brief-look-at-mixture-discriminant-analysis/#comment-1374749931
	#
	# I'm using version 0.4-4 of the `mda` package
	library(mda)
	test_data <- read.csv("ts2.csv")
	colnames(test_data)[16] <- "filter"
	mda_out <- mda(formula=fol_up_u ~ . - filter,
	data=test_data,
	CV=TRUE,
	library(clusteval)
	adjacency_matrix <- function(cluster_labels, names=NULL, force_symmetric=FALSE) {
	adj_matrix <- diag(length(cluster_labels))
	adj_matrix[lower.tri(adj_matrix)] <- clusteval::comembership(cluster_labels)
	if (force_symmetric) {
	adj_matrix <- adj_matrix + t(adj_matrix)
	}
	diag(adj_matrix) <- 0
	if (!is.null(names))
	rownames(adj_matrix) <- colnames(adj_matrix) <- names
	# For the standard conjugate beta prior for a binomial likelihood, a typical
	# approach is to weight each prior observation equally, there are times where
	# the prior Bernoulli trials should be weighted over time, so that the more
	# recent trials are weighted near 1 and the oldest trials should be weighted
	# near 0.

	# Gompertz Function
	# http://en.wikipedia.org/wiki/Gompertz_function
	gompertz <- function(x, a=1, b=1, c=1) {
	a * exp(-b * exp(-c * x))
	library(dplyr)

	objects <- ls()
	object_sizes <- sapply(objects, function(x) object.size(get(x)))
	object_sizes <- data.frame(objects, object_sizes, row.names=NULL)
	object_sizes$units_MB <- utils:::format.object_size(object_sizes$object_sizes, units="Mb")
	dplyr::arrange(object_sizes, object_sizes)
	#' Cuts a vector into factors with pretty levels
	#'
	#' @param x numeric vectory
	#' @param breaks numeric vector of two ore more unique cut points
	#' @param collapse character string to collapse factor labels
	#' @param ... arguments passed to \code{\link[base]{cut}}
	#' @return A \code{\link{factor}} is returned
	#'
	#' @examples
	#' set.seed(42)
	import pandas as pd

	df = pd.DataFrame([['01-02-2015', 'a', 17],
	['01-09-2015', 'a', 42],
	['01-30-2015', 'a', 19],
	['01-02-2015', 'b', 23],
	['01-23-2015', 'b', 1],
	['01-30-2015', 'b', 13]])
	df.columns = ['date', 'group', 'response']
	df.set_index(['date', 'group'], inplace=True)
	# SAT scores data from Table 5.2 on page 120 of Gelman's BDA3 text
	y <- c(28, 8, -3, 7, -1, 1, 18, 12)
	sigma <- c(15, 10, 16, 11, 9, 11, 10, 18)

	# Goal: Replicate calculations in Section 5.5
	# Instructions for posterior simulation given on page 118
	library(itertools2)

	# Equation 5.21 on page 117
	tau_posterior <- function(tau, y, sigma) {