John Ramey ramhiser

## stan-dogs.r
# The Dogs data set was analyzed by D.V. Lindley using a loglinear model for binary data
# For details about the Dogs data set and model, see: http://www.openbugs.net/Examples/Dogs.html

library(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())

num_dogs <- 30
num_trials <- 25
Y <- structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

## brms-nonlinear.r
# The data set and model are described in the *brms* vignette
library(brms)

url <- paste0("https://raw.githubusercontent.com/mages/diesunddas/master/Data/ClarkTriangle.csv")
loss <- read.csv(url)

set.seed(42)

# Generated a random continuous feature
loss$ramey <- runif(nrow(loss))

## random-forest.r
library(randomForest)
library(dplyr)
library(ggplot2)

set.seed(42)

rf_out <- randomForest(Species ~ ., data=iris)

# Extracts variable importance (Mean Decrease in Gini Index)
# Sorts by variable importance and relevels factors to match ordering

## latlong2fips.r
# FCC's Census Block Conversions API
# http://www.fcc.gov/developers/census-block-conversions-api
latlong2fips <- function(latitude, longitude) {
  url <- "http://data.fcc.gov/api/block/find?format=json&latitude=%f&longitude=%f"
  url <- sprintf(url, latitude, longitude)
  json <- RCurl::getURL(url)
  json <- RJSONIO::fromJSON(json)
  as.character(json$County['FIPS'])
}

## character2factor.r
library(dplyr)
iris_char <- iris %>%
  mutate(Species=as.character(Species),
         char_column=sample(letters[1:5], nrow(iris), replace=TRUE))
sum(sapply(iris_char, is.character)) # 2

iris_factor <- iris_char %>%
  mutate_if(sapply(iris_char, is.character), as.factor)
# Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species  char_column
# "numeric"    "numeric"    "numeric"    "numeric"  "character"  "character"

## try_backoff.r
#' Try/catch with exponential backoff
#'
#' Attempts the expression in \code{expr} up to the number of tries specified in
#' \code{max_attempts}. Each time a failure results, the functions sleeps for a
#' random amount of time before re-attempting the expression. The upper bound of
#' the backoff increases exponentially after each failure.
#'
#' For details on exponential backoff, see:
#' \url{http://en.wikipedia.org/wiki/Exponential_backoff}
#'

## export_scikit_pipeline.py
import json

def fullname(o):
  return o.__module__ + "." + o.__class__.__name__

def export_pipeline(scikit_pipeline):
  """JSON export of a scikit-learn pipeline.

  Especially useful when paired with GridSearchCV, TPOT, etc.


## date-range.py
from datetime import datetime, timedelta

def date_range(start, end, step=7, date_format="%m-%d-%Y"):
    """
    Creates generator with a range of dates.
    The dates occur every 7th day (default).

    :param start: the start date of the date range
    :param end: the end date of the date range
    :param step: the step size of the dates

## jaccard.py
import itertools

def jaccard(labels1, labels2):
    """
    Computes the Jaccard similarity between two sets of clustering labels.

    The value returned is between 0 and 1, inclusively. A value of 1 indicates
    perfect agreement between two clustering algorithms, whereas a value of 0
    indicates no agreement. For details on the Jaccard index, see:
    http://en.wikipedia.org/wiki/Jaccard_index

## huber.py
import numpy as np
from statsmodels.robust.scale import huber

# Mean and standard deviation to generate normal random variates
mean, std_dev = 0, 2
sample_size = 25
np.random.seed(42)
x = np.random.normal(mean, std_dev, sample_size)

# Appends a couple of outliers
	# The Dogs data set was analyzed by D.V. Lindley using a loglinear model for binary data
	# For details about the Dogs data set and model, see: http://www.openbugs.net/Examples/Dogs.html

	library(rstan)
	rstan_options(auto_write = TRUE)
	options(mc.cores = parallel::detectCores())

	num_dogs <- 30
	num_trials <- 25
	Y <- structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	# The data set and model are described in the brms vignette
	library(brms)

	url <- paste0("https://raw.githubusercontent.com/mages/diesunddas/master/Data/ClarkTriangle.csv")
	loss <- read.csv(url)

	set.seed(42)

	# Generated a random continuous feature
	loss$ramey <- runif(nrow(loss))
	library(randomForest)
	library(dplyr)
	library(ggplot2)

	set.seed(42)

	rf_out <- randomForest(Species ~ ., data=iris)

	# Extracts variable importance (Mean Decrease in Gini Index)
	# Sorts by variable importance and relevels factors to match ordering
	# FCC's Census Block Conversions API
	# http://www.fcc.gov/developers/census-block-conversions-api
	latlong2fips <- function(latitude, longitude) {
	url <- "http://data.fcc.gov/api/block/find?format=json&latitude=%f&longitude=%f"
	url <- sprintf(url, latitude, longitude)
	json <- RCurl::getURL(url)
	json <- RJSONIO::fromJSON(json)
	as.character(json$County['FIPS'])
	}
	library(dplyr)
	iris_char <- iris %>%
	mutate(Species=as.character(Species),
	char_column=sample(letters[1:5], nrow(iris), replace=TRUE))
	sum(sapply(iris_char, is.character)) # 2

	iris_factor <- iris_char %>%
	mutate_if(sapply(iris_char, is.character), as.factor)
	# Sepal.Length Sepal.Width Petal.Length Petal.Width Species char_column
	# "numeric" "numeric" "numeric" "numeric" "character" "character"
	#' Try/catch with exponential backoff
	#'
	#' Attempts the expression in \code{expr} up to the number of tries specified in
	#' \code{max_attempts}. Each time a failure results, the functions sleeps for a
	#' random amount of time before re-attempting the expression. The upper bound of
	#' the backoff increases exponentially after each failure.
	#'
	#' For details on exponential backoff, see:
	#' \url{http://en.wikipedia.org/wiki/Exponential_backoff}
	#'
	import json

	def fullname(o):
	return o.__module__ + "." + o.__class__.__name__

	def export_pipeline(scikit_pipeline):
	"""JSON export of a scikit-learn pipeline.

	Especially useful when paired with GridSearchCV, TPOT, etc.
	from datetime import datetime, timedelta

	def date_range(start, end, step=7, date_format="%m-%d-%Y"):
	"""
	Creates generator with a range of dates.
	The dates occur every 7th day (default).

	:param start: the start date of the date range
	:param end: the end date of the date range
	:param step: the step size of the dates
	import itertools

	def jaccard(labels1, labels2):
	"""
	Computes the Jaccard similarity between two sets of clustering labels.

	The value returned is between 0 and 1, inclusively. A value of 1 indicates
	perfect agreement between two clustering algorithms, whereas a value of 0
	indicates no agreement. For details on the Jaccard index, see:
	http://en.wikipedia.org/wiki/Jaccard_index
	import numpy as np
	from statsmodels.robust.scale import huber

	# Mean and standard deviation to generate normal random variates
	mean, std_dev = 0, 2
	sample_size = 25
	np.random.seed(42)
	x = np.random.normal(mean, std_dev, sample_size)

	# Appends a couple of outliers