Josef Fruehwald JoFrhwld

## Rmd_example.rmd
#### Values created by statistics
Statistical layers added to plots actually create new pieces of data, like the y-coordinates of the smoother. Some statistical layers create a few different values, and you can choose which one you want to plot. For example, here is a density plot, where the kernel density estimate is represented by a colored line.

```{r tidy = F, fig.width = 8/1.2, fig.height=5/1.2}
ggplot(I_jean, aes(Dur_msec, color = Word))+
  geom_density()
```

You have to understand the densities represented in this plot as being conditional on selecting a specific word. That is, given that we have decided to think about the lexical item "I've", what is the probability it will be found in a specific range of durations?

## cohen_d_height.R
# http://www.cdc.gov/nchs/data/nhsr/nhsr010.pdf

n_fem = 604
h_fem = 162.2
se_fem = 0.34
sd_fem = se_fem * sqrt(n_fem)

n_mal = 591
h_mal = 176.6
se_mal = 0.38

## equality.R
library(ggplot2)
library(grid)

bg <- data.frame(xmin = 0, xmax = 180, ymin = 0, ymax = 180)
bg_col <- rgb(0.8,0,0)

bars <- data.frame(xmin = c(35, 35), xmax = c(145, 145), ymin = c(45, 100), ymax = c(80, 135), groups = c("first", "second") )
bars_col <- rgb(0.9, 0.56, 0.56)


## syllabify.R
syllabify <- function(trans){
    require(stringr)

    segments <- unlist(str_split(trans, " "))
	nuclei_string <- "A|E|I|O|U|@"
	nucs <- grep(nuclei_string, segments)
	n <- length(nucs)

	r_colored <- nucs[grep("R", segments[nucs])]

## multi_match.R
multi_match <- function(x, table){
    # returns initial indicies of all substrings in table which match x
    if(length(table) < length(x)){
		return(NA)
	}else{
		check_mat <- matrix(nrow = length(x), ncol = length(table))
		for(i in 1:length(x)){
			check_mat[i,] <- table %in% x[i]
		}
		out <- vector(length = length(table))

## ses_college.R
data.frame(SES = c("High","Middle","Low"),
		   Applied = c(0.79, 0.59, 0.5),
		   Admitted = c(0.67, 0.47, 0.42),
		   Enrolled = c(0.53, 0.28, 0.32)) -> data

ggplot(data, aes(SES)) +
	geom_point(aes(y = Applied, color = "Applied"))+
	geom_line(aes(y = Applied, group = 1, color = "Applied"))+
	geom_point(aes(y = Admitted/Applied, color = "Applied and Admitted"))+
	geom_line(aes(group = 1, y = Admitted/Applied, color = "Applied and Admitted"))+

## theKingOfFrance.py
theKingOfFrance = None
bald = ["JeanLucPicard", "StoneColdSteveAustin"]

print("Jean: The King of France is bald.")
if theKingOfFrance in bald:
    print("Jaques: It's true!")
elif theKingOfFrance not in bald:
    print("Jaques: It's false!")

print("")

## sql_load.R
#' SQL Load
#'
#' This is function meant to be used along with ldply to read data in using sqldf.
#'
#' @param x the path to a file to be read
#' @param selection the columns to return. Defaults to \code{"*"}
#' @param condition conditions defining which data rows to load in SQL
#' @param file.format an argument to be passed to \code{sqldf}.
#' Defaults to assume a tab-delimited file with a header row.
#' See \code{?sqldf} for more info

## dplyr_to_ggplot2.r
libarary(plyr)
library(dplyr)
library(ggplot2)


baseball %>%
  group_by(year)%>%
  summarise(r=sum(r)) %>%
  ggplot(., aes(year, r)) +
    geom_point()

## bootMer_ex.R
library(lme4)

mod <-  lmer(F1_n ~ plt_vclass * Decade_c * freq_c + (plt_vclass + freq_c| File) + (Decade_c|word),
             data = ays_to_test)

boot_fun <- function(mod){
  # x is a named vector
  x <- fixef(mod)

  #out is a longer named vector
	#### Values created by statistics
	Statistical layers added to plots actually create new pieces of data, like the y-coordinates of the smoother. Some statistical layers create a few different values, and you can choose which one you want to plot. For example, here is a density plot, where the kernel density estimate is represented by a colored line.

	```{r tidy = F, fig.width = 8/1.2, fig.height=5/1.2}
	ggplot(I_jean, aes(Dur_msec, color = Word))+
	geom_density()
	```

	You have to understand the densities represented in this plot as being conditional on selecting a specific word. That is, given that we have decided to think about the lexical item "I've", what is the probability it will be found in a specific range of durations?
	# http://www.cdc.gov/nchs/data/nhsr/nhsr010.pdf

	n_fem = 604
	h_fem = 162.2
	se_fem = 0.34
	sd_fem = se_fem * sqrt(n_fem)

	n_mal = 591
	h_mal = 176.6
	se_mal = 0.38
	library(ggplot2)
	library(grid)

	bg <- data.frame(xmin = 0, xmax = 180, ymin = 0, ymax = 180)
	bg_col <- rgb(0.8,0,0)

	bars <- data.frame(xmin = c(35, 35), xmax = c(145, 145), ymin = c(45, 100), ymax = c(80, 135), groups = c("first", "second") )
	bars_col <- rgb(0.9, 0.56, 0.56)
	syllabify <- function(trans){
	require(stringr)

	segments <- unlist(str_split(trans, " "))
	nuclei_string <- "A\|E\|I\|O\|U\|@"
	nucs <- grep(nuclei_string, segments)
	n <- length(nucs)

	r_colored <- nucs[grep("R", segments[nucs])]
	multi_match <- function(x, table){
	# returns initial indicies of all substrings in table which match x
	if(length(table) < length(x)){
	return(NA)
	}else{
	check_mat <- matrix(nrow = length(x), ncol = length(table))
	for(i in 1:length(x)){
	check_mat[i,] <- table %in% x[i]
	}
	out <- vector(length = length(table))
	data.frame(SES = c("High","Middle","Low"),
	Applied = c(0.79, 0.59, 0.5),
	Admitted = c(0.67, 0.47, 0.42),
	Enrolled = c(0.53, 0.28, 0.32)) -> data

	ggplot(data, aes(SES)) +
	geom_point(aes(y = Applied, color = "Applied"))+
	geom_line(aes(y = Applied, group = 1, color = "Applied"))+
	geom_point(aes(y = Admitted/Applied, color = "Applied and Admitted"))+
	geom_line(aes(group = 1, y = Admitted/Applied, color = "Applied and Admitted"))+
	theKingOfFrance = None
	bald = ["JeanLucPicard", "StoneColdSteveAustin"]

	print("Jean: The King of France is bald.")
	if theKingOfFrance in bald:
	print("Jaques: It's true!")
	elif theKingOfFrance not in bald:
	print("Jaques: It's false!")

	print("")
	#' SQL Load
	#'
	#' This is function meant to be used along with ldply to read data in using sqldf.
	#'
	#' @param x the path to a file to be read
	#' @param selection the columns to return. Defaults to \code{"*"}
	#' @param condition conditions defining which data rows to load in SQL
	#' @param file.format an argument to be passed to \code{sqldf}.
	#' Defaults to assume a tab-delimited file with a header row.
	#' See \code{?sqldf} for more info
	libarary(plyr)
	library(dplyr)
	library(ggplot2)


	baseball %>%
	group_by(year)%>%
	summarise(r=sum(r)) %>%
	ggplot(., aes(year, r)) +
	geom_point()
	library(lme4)

	mod <- lmer(F1_n ~ plt_vclass * Decade_c * freq_c + (plt_vclass + freq_c\| File) + (Decade_c\|word),
	data = ays_to_test)

	boot_fun <- function(mod){
	# x is a named vector
	x <- fixef(mod)

	#out is a longer named vector