naomispence/confidence intervals, part 1

## confidence intervals, part 1
For this lab, you need to add new libraries. I recommend you put them in Chunk 1 with your other library code.

library(lsr)
library(dplyr)

#get different confidence intervals for the same variable
mean(GSS$tvhours, na.rm=TRUE)
ciMean(GSS$tvhours, na.rm=TRUE, conf =0.90)
ciMean(GSS$tvhours, na.rm=TRUE, conf =0.95)
ciMean(GSS$tvhours, na.rm=TRUE, conf=0.99)

#dichotomize a variable and get confidence interval around proportion
GSS$nochild <-as.numeric(GSS$childs) <= 0
frequency(GSS$nochild)
mean(GSS$nochild, na.rm = TRUE)
#90% CI proportion of childless adults
ciMean(as.numeric(GSS$nochild), na.rm=TRUE, conf =0.90)
#95% CI proportion of childless adults
ciMean(as.numeric(GSS$nochild), na.rm=TRUE, conf =0.95)


#get mean for different groups with the code below
aggregate(GSS$tvhours, na.rm=TRUE, by=list(GSS$race), mean)
#Although it is not clearly labeled as such, the code below provides a 95% confidence interval
aggregate(GSS$tvhours, na.rm=TRUE, by=list(GSS$race), ciMean)

#dichotomize another variable
GSS$youngadult <- GSS$age <= 25
frequency(GSS$youngadult)

#look at 2-way table (crosstab)
crosstab(GSS, row.vars ="nochild", col.vars = "youngadult")
#compare the result that you get from running the line of code above and the line below.
crosstab(GSS, row.vars ="nochild", col.vars = "youngadult", format="column_percent", row.margin.format = "percent")

#to get the CI proportion of childless adults for age groups, you can select out an age group using the dplyr package.
#the line of code below "filters out" young adults (25 and younger) and creates a temporary dataset named young.
young<-dplyr::filter(GSS, youngadult=="TRUE")
#the variable value in quotation marks at the end of the line of code above is case sensitive.

#notice that the line below uses temporary dataset called young instead of GSS
ciMean(as.numeric(young$nochild), na.rm=TRUE, conf =0.95)

#the two lines below do the same as above for "older" adults (26+ years old). compare the young and older CI proportion of childless adults.
older<-dplyr::filter(GSS, youngadult=="FALSE")
ciMean(as.numeric(older$nochild), na.rm=TRUE, conf =0.95)
	For this lab, you need to add new libraries. I recommend you put them in Chunk 1 with your other library code.

	library(lsr)
	library(dplyr)

	#get different confidence intervals for the same variable
	mean(GSS$tvhours, na.rm=TRUE)
	ciMean(GSS$tvhours, na.rm=TRUE, conf =0.90)
	ciMean(GSS$tvhours, na.rm=TRUE, conf =0.95)
	ciMean(GSS$tvhours, na.rm=TRUE, conf=0.99)

	#dichotomize a variable and get confidence interval around proportion
	GSS$nochild <-as.numeric(GSS$childs) <= 0
	frequency(GSS$nochild)
	mean(GSS$nochild, na.rm = TRUE)
	#90% CI proportion of childless adults
	ciMean(as.numeric(GSS$nochild), na.rm=TRUE, conf =0.90)
	#95% CI proportion of childless adults
	ciMean(as.numeric(GSS$nochild), na.rm=TRUE, conf =0.95)


	#get mean for different groups with the code below
	aggregate(GSS$tvhours, na.rm=TRUE, by=list(GSS$race), mean)
	#Although it is not clearly labeled as such, the code below provides a 95% confidence interval
	aggregate(GSS$tvhours, na.rm=TRUE, by=list(GSS$race), ciMean)

	#dichotomize another variable
	GSS$youngadult <- GSS$age <= 25
	frequency(GSS$youngadult)

	#look at 2-way table (crosstab)
	crosstab(GSS, row.vars ="nochild", col.vars = "youngadult")
	#compare the result that you get from running the line of code above and the line below.
	crosstab(GSS, row.vars ="nochild", col.vars = "youngadult", format="column_percent", row.margin.format = "percent")

	#to get the CI proportion of childless adults for age groups, you can select out an age group using the dplyr package.
	#the line of code below "filters out" young adults (25 and younger) and creates a temporary dataset named young.
	young<-dplyr::filter(GSS, youngadult=="TRUE")
	#the variable value in quotation marks at the end of the line of code above is case sensitive.

	#notice that the line below uses temporary dataset called young instead of GSS
	ciMean(as.numeric(young$nochild), na.rm=TRUE, conf =0.95)

	#the two lines below do the same as above for "older" adults (26+ years old). compare the young and older CI proportion of childless adults.
	older<-dplyr::filter(GSS, youngadult=="FALSE")
	ciMean(as.numeric(older$nochild), na.rm=TRUE, conf =0.95)