Last active
April 11, 2024 03:05
-
-
Save naomispence/cebaf339b8b431fd411058f8c3f83d06 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##Comparing CIs and distributions | |
#Load the libraries and data first | |
library(ggplot2) | |
library(dplyr) | |
library(lsr) | |
library(descr) | |
library(Hmisc) | |
library('lehmansociology') | |
data(gss123) | |
options(scipen = 999) | |
#In this lab we will learn how to explore the relationship between a | |
#dichotomous independent variable and an interval-ratio dependent variable. | |
#We do this by getting detailed information on each of the two categories | |
#of the dichotomous variable. The example we will work through looks at the | |
#relationship between sex and income in constant dollars. | |
#WHAT WOULD THE RESEARCH QUESTION BE? | |
#to make a variable dichotomous, we first need to see which category is | |
#coded as 1. | |
frequency(gss123$sex) | |
frequency(as.numeric(gss123$sex)) | |
#The code below shows you how to formally dichotomize a variable so that | |
#the two groups are coded as 0 and 1. | |
#This is important for doing statistical | |
#analyses. Note that we name the new dichotomous variable whatever the | |
#category is that we have coded as 1 | |
gss123$male<-(as.numeric(gss123$sex)) ==1 | |
frequency(gss123$male) | |
mean(gss123$male) | |
#next, we want to create two separate mini datasets, one for one of | |
#our dichotomous categories, and one for the other | |
#the line of code below "filters out" or pulls out the males and creates a | |
#temporary mini dataset with just the males in it, named male | |
male<-dplyr::filter(gss123, male=="TRUE") | |
#the variable value in quotation marks at the end of the line of code above | |
#is case sensitive. | |
#notice that the line below uses the temporary dataset called male instead of | |
#gss123 and that male is case sensitive | |
#We are getting summary statistics and a histogram for income for MALES only | |
summary(male$conrinc, na.rm=TRUE) | |
sd(male$conrinc, na.rm=TRUE) | |
ggplot(data = male, aes(x = conrinc)) + | |
geom_histogram(color="blue", fill="pink", binwidth =10000, | |
aes(y=(..count../sum(..count..))*100)) + | |
ggtitle("Distribution of American Males by Income") + | |
labs(y="Percent", x="Income") | |
#Now we are filtering out females into a mini dataset and getting | |
#summary statistics and a | |
#histogram for income for FEMALES only. | |
female<-dplyr::filter(gss123, male=="FALSE") | |
summary(female$conrinc, na.rm=TRUE) | |
sd(female$conrinc, na.rm=TRUE) | |
ggplot(data = female, aes(x = conrinc)) + | |
geom_histogram(color="blue", fill="pink", binwidth =10000, | |
aes(y=(..count../sum(..count..))*100)) + | |
ggtitle("Distribution of American Females by Income") + | |
labs(y="Percent", x="Income") | |
#Comparing means in a bar graph | |
ggplot(data=gss123) + | |
stat_summary(aes(x=sex, y=conrinc), fun=mean, geom="bar") + | |
xlab("Sex") + | |
ylab("Mean Income in Constant Dollars") + | |
theme(axis.text.x=element_text(angle=-45)) | |
#Compare the results you got for males and for females. Which has a bigger | |
#mean? Which has more variability? Does it seem like males and females | |
#differ in their income? | |
#Note that we just compared descriptive statistics. What if we want to | |
#compare inferential statistics? | |
#Let's compare the confidence intervals for the mean income for males | |
#and females. | |
#Comparing the CIs- 95% | |
ciMean(male$conrinc, na.rm=TRUE, conf =0.95) | |
ciMean(female$conrinc, na.rm=TRUE, conf =0.95) | |
#Comparing the CIs - 99% | |
ciMean(male$conrinc, na.rm=TRUE, conf =0.99) | |
ciMean(female$conrinc, na.rm=TRUE, conf =0.99) | |
#Interpret the confidence intervals and come to a conclusion about whether | |
#sex and income are related. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment