alex hayes alexpghayes

## glmnet_factor_ex.r
library(ggplot2)
library(glmnet)

x <- model.matrix(mpg ~ factor(cyl) + factor(gear) + wt * factor(gear), mtcars)
y <- mtcars$mpg

x

fit <- cv.glmnet(x, y, alpha = 1) # lasso when alpha = 1, ridge when alpha = 0

## marc_bot.py
import numpy as np

phrases = [ \
    "I don't like Guerra", \
    "Roberto is excellent. I like Roberto.", \
    "Dobelman is a great teacher! How could you not like Dobelman!", \
    "I don't like Devika. She's not a very good teacher.", \
    "Noon is too early to go to class.", \
    "I like Ms. Poon :)", \
    "!"]

## gather_mutate_spread.r
I have categorical data spread across multiple columns that I would like to aggregate.

    library(tidyverse)

    data <- data_frame(var1 = sample(LETTERS[1:2], 50, replace = TRUE), # categorical A/B
                       var2 = sample(LETTERS[1:2], 50, replace = TRUE),
                       var3 = sample(LETTERS[1:2], 50, replace = TRUE),
                       var4 = sample(LETTERS[3:4], 50, replace = TRUE), # categorical C/D
                       var5 = sample(LETTERS[3:4], 50, replace = TRUE),
                       var6 = sample(LETTERS[3:4], 50, replace = TRUE)) %>%

## ttt_simulated_distribution.R
library(tidyverse)

get_ts <- function(group_size, ttt_p_inf, ctrl_p_inf) {
  a <- rbinom(1, size = group_size, prob = ttt_p_inf) # a
  c <- rbinom(1, size= group_size, prob = ctrl_p_inf) # c

  b <- group_size - a
  d <- group_size - c

  ((a - c) / group_size) / sqrt((a * b + c * d) / group_size ^ 3)

## merge_chr.R
df <- tribble(
  ~ID, ~d1, ~d2, ~d3,
  1, "G", "G", "C",
  2, NA, "G", "T",
  3, "A", NA, "G",
  4, "G", "A", "A",
  5,  NA,  NA, NA,
  6, "G", "G", "G")

merge_chr <- function(df, col, ..., fun, remove = TRUE) {

## gather_spread_saves_the_day.r
library(tidyverse)

data_frame(id = 1:50,
  rel1 = sample(LETTERS[1:4], 50, replace = TRUE),
  gender1 = sample(c("M", "F", "O"), 50, replace = TRUE),
  score1 = rnorm(50),
  rel2 = sample(LETTERS[1:4], 50, replace = TRUE),
  gender2 = sample(c("M", "F", "O"), 50, replace = TRUE),
  score2 = rnorm(50)) %>%
  gather(field, value, -id) %>%

## exprs_for_mutate.R
library(tidyverse)
library(rlang)

f <- function(x, c) c * x
col_names <- c("am", "gear", "carb")

exprs <- purrr::map(col_names, ~quo(!!paste0("c_", .x) := f(!!sym(.x), 3)))
mutate(mtcars, !!!exprs)

# Error in mutate_impl(.data, dots) : Column ``:=`("c_am", f(am, 3))` is of unsupported type quoted call

## broadcast.Rmd
A starter example is centering a matrix

```{python}
import numpy as np

X = np.random.normal(size=(3, 3))  # random 3 by 3 matrix

mu = X.mean(axis = 0)    # array (vector) of column means

print("Original matrix")

## call_python_from_r.R
library(reticulate)
library(tidyverse)

X <- rbind(
  c(1, 2),
  c(3, 4)
)

y <- cbind(
  c(3, 5)

## relational_algebra_ex.R
library(tidyverse)

beans <- c("Caturra", "Grusti", "Double roasted")
coffees <- c("Garuda", "Blend 101", "Blend 201", "Exxxtra special blend")
location <- c("Rwanda", "Columbia", "Peru")
people <- c("Dan Wallach", "Chris Jermaine", "Scott Rikner", "Luay")

has_bean <- tibble(
  coffee = sample(coffees, 10, replace = TRUE),
  bean_name = sample(beans, 10, replace = TRUE)
	library(ggplot2)
	library(glmnet)

	x <- model.matrix(mpg ~ factor(cyl) + factor(gear) + wt * factor(gear), mtcars)
	y <- mtcars$mpg

	x

	fit <- cv.glmnet(x, y, alpha = 1) # lasso when alpha = 1, ridge when alpha = 0
	import numpy as np

	phrases = [ \
	"I don't like Guerra", \
	"Roberto is excellent. I like Roberto.", \
	"Dobelman is a great teacher! How could you not like Dobelman!", \
	"I don't like Devika. She's not a very good teacher.", \
	"Noon is too early to go to class.", \
	"I like Ms. Poon :)", \
	"!"]
	I have categorical data spread across multiple columns that I would like to aggregate.

	library(tidyverse)

	data <- data_frame(var1 = sample(LETTERS[1:2], 50, replace = TRUE), # categorical A/B
	var2 = sample(LETTERS[1:2], 50, replace = TRUE),
	var3 = sample(LETTERS[1:2], 50, replace = TRUE),
	var4 = sample(LETTERS[3:4], 50, replace = TRUE), # categorical C/D
	var5 = sample(LETTERS[3:4], 50, replace = TRUE),
	var6 = sample(LETTERS[3:4], 50, replace = TRUE)) %>%
	library(tidyverse)

	get_ts <- function(group_size, ttt_p_inf, ctrl_p_inf) {
	a <- rbinom(1, size = group_size, prob = ttt_p_inf) # a
	c <- rbinom(1, size= group_size, prob = ctrl_p_inf) # c

	b <- group_size - a
	d <- group_size - c

	((a - c) / group_size) / sqrt((a * b + c * d) / group_size ^ 3)
	df <- tribble(
	~ID, ~d1, ~d2, ~d3,
	1, "G", "G", "C",
	2, NA, "G", "T",
	3, "A", NA, "G",
	4, "G", "A", "A",
	5, NA, NA, NA,
	6, "G", "G", "G")

	merge_chr <- function(df, col, ..., fun, remove = TRUE) {
	library(tidyverse)

	data_frame(id = 1:50,
	rel1 = sample(LETTERS[1:4], 50, replace = TRUE),
	gender1 = sample(c("M", "F", "O"), 50, replace = TRUE),
	score1 = rnorm(50),
	rel2 = sample(LETTERS[1:4], 50, replace = TRUE),
	gender2 = sample(c("M", "F", "O"), 50, replace = TRUE),
	score2 = rnorm(50)) %>%
	gather(field, value, -id) %>%
	library(tidyverse)
	library(rlang)

	f <- function(x, c) c * x
	col_names <- c("am", "gear", "carb")

	exprs <- purrr::map(col_names, ~quo(!!paste0("c_", .x) := f(!!sym(.x), 3)))
	mutate(mtcars, !!!exprs)

	# Error in mutate_impl(.data, dots) : Column ``:=`("c_am", f(am, 3))` is of unsupported type quoted call
	A starter example is centering a matrix

	```{python}
	import numpy as np

	X = np.random.normal(size=(3, 3)) # random 3 by 3 matrix

	mu = X.mean(axis = 0) # array (vector) of column means

	print("Original matrix")
	library(reticulate)
	library(tidyverse)

	X <- rbind(
	c(1, 2),
	c(3, 4)
	)

	y <- cbind(
	c(3, 5)
	library(tidyverse)

	beans <- c("Caturra", "Grusti", "Double roasted")
	coffees <- c("Garuda", "Blend 101", "Blend 201", "Exxxtra special blend")
	location <- c("Rwanda", "Columbia", "Peru")
	people <- c("Dan Wallach", "Chris Jermaine", "Scott Rikner", "Luay")

	has_bean <- tibble(
	coffee = sample(coffees, 10, replace = TRUE),
	bean_name = sample(beans, 10, replace = TRUE)