Josef Fruehwald JoFrhwld

## many_files_recipe.R
#' I often have individual speaker data files in a nested directory structure.
#' But I also often want to read all speaker's data into R in one big data frame.
#' Here's my current best recipe.

library(tidyverse)

#' glob for the file list. This is dependent on good directory naming practices
all_files <- Sys.glob("path/speakerid*/*.csv")

df <- data_frame(file = all_files) %>%               # make a column of all of the file paths

## terror.R
#' rvest for scraping 538
library(rvest)
library(magrittr)

#' scrape the forecast
five38 <- read_html("http://projects.fivethirtyeight.com/2016-election-forecast/?ex_cid=rrpromo#plus")

#' I'd prefer to be using the polls-pluss forecast here, but
#' can only seem to get the polls only
clinton <- five38 %>%

## theKingOfFrance.py
theKingOfFrance = None
bald = ["JeanLucPicard", "StoneColdSteveAustin"]

print("Jean: The King of France is bald.")
if theKingOfFrance in bald:
    print("Jaques: It's true!")
elif theKingOfFrance not in bald:
    print("Jaques: It's false!")

print("")

## ses_college.R
data.frame(SES = c("High","Middle","Low"),
		   Applied = c(0.79, 0.59, 0.5),
		   Admitted = c(0.67, 0.47, 0.42),
		   Enrolled = c(0.53, 0.28, 0.32)) -> data

ggplot(data, aes(SES)) +
	geom_point(aes(y = Applied, color = "Applied"))+
	geom_line(aes(y = Applied, group = 1, color = "Applied"))+
	geom_point(aes(y = Admitted/Applied, color = "Applied and Admitted"))+
	geom_line(aes(group = 1, y = Admitted/Applied, color = "Applied and Admitted"))+

## multi_match.R
multi_match <- function(x, table){
    # returns initial indicies of all substrings in table which match x
    if(length(table) < length(x)){
		return(NA)
	}else{
		check_mat <- matrix(nrow = length(x), ncol = length(table))
		for(i in 1:length(x)){
			check_mat[i,] <- table %in% x[i]
		}
		out <- vector(length = length(table))

## syllabify.R
syllabify <- function(trans){
    require(stringr)

    segments <- unlist(str_split(trans, " "))
	nuclei_string <- "A|E|I|O|U|@"
	nucs <- grep(nuclei_string, segments)
	n <- length(nucs)

	r_colored <- nucs[grep("R", segments[nucs])]

## equality.R
library(ggplot2)
library(grid)

bg <- data.frame(xmin = 0, xmax = 180, ymin = 0, ymax = 180)
bg_col <- rgb(0.8,0,0)

bars <- data.frame(xmin = c(35, 35), xmax = c(145, 145), ymin = c(45, 100), ymax = c(80, 135), groups = c("first", "second") )
bars_col <- rgb(0.9, 0.56, 0.56)


## cohen_d_height.R
# http://www.cdc.gov/nchs/data/nhsr/nhsr010.pdf

n_fem = 604
h_fem = 162.2
se_fem = 0.34
sd_fem = se_fem * sqrt(n_fem)

n_mal = 591
h_mal = 176.6
se_mal = 0.38

## Rmd_example.rmd
#### Values created by statistics
Statistical layers added to plots actually create new pieces of data, like the y-coordinates of the smoother. Some statistical layers create a few different values, and you can choose which one you want to plot. For example, here is a density plot, where the kernel density estimate is represented by a colored line.

```{r tidy = F, fig.width = 8/1.2, fig.height=5/1.2}
ggplot(I_jean, aes(Dur_msec, color = Word))+
  geom_density()
```

You have to understand the densities represented in this plot as being conditional on selecting a specific word. That is, given that we have decided to think about the lexical item "I've", what is the probability it will be found in a specific range of durations?

## zero_crossings.R
#' Find zero crossings in an fd object
#'
#' @import fda
#' @import magrittr
#'
#' @param fd an fd object
#' @param Lfdobj the derivative (0, 1, 2)
#' @param slope The slope of interest at the zero crossing
#' @param eps The prediction granularity
#' @param min Localize the zero crossing search to be greater than min
	#' I often have individual speaker data files in a nested directory structure.
	#' But I also often want to read all speaker's data into R in one big data frame.
	#' Here's my current best recipe.

	library(tidyverse)

	#' glob for the file list. This is dependent on good directory naming practices
	all_files <- Sys.glob("path/speakerid/.csv")

	df <- data_frame(file = all_files) %>% # make a column of all of the file paths
	#' rvest for scraping 538
	library(rvest)
	library(magrittr)

	#' scrape the forecast
	five38 <- read_html("http://projects.fivethirtyeight.com/2016-election-forecast/?ex_cid=rrpromo#plus")

	#' I'd prefer to be using the polls-pluss forecast here, but
	#' can only seem to get the polls only
	clinton <- five38 %>%
	theKingOfFrance = None
	bald = ["JeanLucPicard", "StoneColdSteveAustin"]

	print("Jean: The King of France is bald.")
	if theKingOfFrance in bald:
	print("Jaques: It's true!")
	elif theKingOfFrance not in bald:
	print("Jaques: It's false!")

	print("")
	data.frame(SES = c("High","Middle","Low"),
	Applied = c(0.79, 0.59, 0.5),
	Admitted = c(0.67, 0.47, 0.42),
	Enrolled = c(0.53, 0.28, 0.32)) -> data

	ggplot(data, aes(SES)) +
	geom_point(aes(y = Applied, color = "Applied"))+
	geom_line(aes(y = Applied, group = 1, color = "Applied"))+
	geom_point(aes(y = Admitted/Applied, color = "Applied and Admitted"))+
	geom_line(aes(group = 1, y = Admitted/Applied, color = "Applied and Admitted"))+
	multi_match <- function(x, table){
	# returns initial indicies of all substrings in table which match x
	if(length(table) < length(x)){
	return(NA)
	}else{
	check_mat <- matrix(nrow = length(x), ncol = length(table))
	for(i in 1:length(x)){
	check_mat[i,] <- table %in% x[i]
	}
	out <- vector(length = length(table))
	syllabify <- function(trans){
	require(stringr)

	segments <- unlist(str_split(trans, " "))
	nuclei_string <- "A\|E\|I\|O\|U\|@"
	nucs <- grep(nuclei_string, segments)
	n <- length(nucs)

	r_colored <- nucs[grep("R", segments[nucs])]
	library(ggplot2)
	library(grid)

	bg <- data.frame(xmin = 0, xmax = 180, ymin = 0, ymax = 180)
	bg_col <- rgb(0.8,0,0)

	bars <- data.frame(xmin = c(35, 35), xmax = c(145, 145), ymin = c(45, 100), ymax = c(80, 135), groups = c("first", "second") )
	bars_col <- rgb(0.9, 0.56, 0.56)
	# http://www.cdc.gov/nchs/data/nhsr/nhsr010.pdf

	n_fem = 604
	h_fem = 162.2
	se_fem = 0.34
	sd_fem = se_fem * sqrt(n_fem)

	n_mal = 591
	h_mal = 176.6
	se_mal = 0.38
	#### Values created by statistics
	Statistical layers added to plots actually create new pieces of data, like the y-coordinates of the smoother. Some statistical layers create a few different values, and you can choose which one you want to plot. For example, here is a density plot, where the kernel density estimate is represented by a colored line.

	```{r tidy = F, fig.width = 8/1.2, fig.height=5/1.2}
	ggplot(I_jean, aes(Dur_msec, color = Word))+
	geom_density()
	```

	You have to understand the densities represented in this plot as being conditional on selecting a specific word. That is, given that we have decided to think about the lexical item "I've", what is the probability it will be found in a specific range of durations?
	#' Find zero crossings in an fd object
	#'
	#' @import fda
	#' @import magrittr
	#'
	#' @param fd an fd object
	#' @param Lfdobj the derivative (0, 1, 2)
	#' @param slope The slope of interest at the zero crossing
	#' @param eps The prediction granularity
	#' @param min Localize the zero crossing search to be greater than min