Last active
April 11, 2024 03:05
-
-
Save naomispence/84ffbd0850eb85ae7082ba2bd9175568 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Lab Report 5: Making and Comparing Confidence Intervals | |
library(ggplot2) | |
library(dplyr) | |
library(lsr) | |
library(descr) | |
library(Hmisc) | |
library('lehmansociology') | |
data(gss123) | |
options(scipen = 999) | |
#For this LR we will be looking at the | |
#relationship between a dichotomous nominal independent variable | |
#and an interval ratio dependent variable | |
#First, we have to look at our dichotomous variable and see | |
#what the two categories are. | |
#WHICH DICHOTOMOUS VARIABLE ARE YOU USING? | |
#REPLACE SEX WITH THE NAME OF YOUR DICHOTOMOUS VARIABLE | |
frequency(gss123$sex) | |
frequency(as.numeric(gss123$sex)) | |
#The code below shows you how to formally dichotomize a variable so that | |
#the two groups are coded as 0 and 1. | |
#This is important for doing statistical | |
#analyses. Note that we name the new dichotomous variable whatever the | |
#category is that we have coded as 1 | |
gss123$male<-(as.numeric(gss123$sex)) ==1 | |
frequency(gss123$male) | |
mean(gss123$male) | |
#Look at the two cateogories in your frequency table | |
#and decide what you will call each of your mini | |
#datasets. Choose names that make sense. | |
#Below, REPLACE men and women WITH THE NAMES OF YOUR TWO | |
#MINI DATASETS. REPLACE sex WITH THE NAME OF YOUR | |
#DICHOTOMOUS VARIABLE. REPLACE "Male" and "Female" | |
#WITH THE TWO CATEGORIES OF YOUR FREQUENCY TABLE. | |
#REMEMBER TO INCLUDE THEM IN QUOTATION MARKS AND | |
#TO WRITE THEM EXACTLY AS THEY APPEAR (FOR | |
#EXAMPLE, IF THEY ARE CAPITALIZED IN THE FREQUENCY | |
#TABLE THEY NEED TO BE CAPITALIZED IN THE CODE). | |
men<-dplyr::filter(gss123, male=="TRUE") | |
women<-dplyr::filter(gss123, male=="FALSE") | |
#Follow the code below for the rest of your | |
#LR but replace men and | |
#women with the names of your two mini datasets | |
#and replace conrinc with the name of your | |
#interval ratio variable. | |
#Make sure to change titles and labels, too! | |
#Fill in your answers below | |
#WHICH INTERVAL-RATIO VARIABLE ARE YOU USING? | |
#WHAT IS YOUR RESEARCH QUESTION? | |
#Summary statistics and a histogram for your dependent variable | |
#for your first mini dataset | |
#CHANGE THIS CODE BY REPLACING men WITH THE NAME | |
#OF ONE OF YOUR TWO MINI DATASETS AND REPLACING | |
#conrinc WITH YOUR DEPENDENT VARIABLE | |
#AND BY EDITING THE TITLES AND LABELS FOR GRAPHS | |
#REMEMBER TO THINK ABOUT BINWIDTH AND WHETHER YOU WANT | |
#TO CHANGE IT TO SOMETHING BIGGER | |
summary(men$conrinc, na.rm=TRUE) | |
sd(men$conrinc, na.rm=TRUE) | |
ggplot(data = men, aes(x = conrinc)) + | |
geom_histogram(color="blue", fill="pink", binwidth =1, | |
aes(y=(..count../sum(..count..))*100)) + | |
ggtitle("Distribution of Males' Income, GSS") + | |
labs(y="Percent", x="Income") | |
#INTERPRET THE SUMMARY STATISTICS AND HISTOGRAM | |
#Now we are getting summary statistics and a | |
#histogram for your second mini dataset. | |
#CHANGE THIS CODE BY REPLACING women WITH THE NAME | |
#OF YOUR OTHER MINI DATASET AND REPLACING | |
#conrinc WITH YOUR DEPENDENT VARIABLE | |
#AND BY EDITING THE TITLES AND LABELS FOR GRAPHS | |
#REMEMBER TO THINK ABOUT BINWIDTH AND WHETHER YOU WANT | |
#TO CHANGE IT TO SOMETHING BIGGER | |
summary(women$conrinc, na.rm=TRUE) | |
sd(women$conrinc, na.rm=TRUE) | |
ggplot(data = women, aes(x = conrinc)) + | |
geom_histogram(color="blue", fill="pink", binwidth =1, | |
aes(y=(..count../sum(..count..))*100)) + | |
ggtitle("Distribution of Females' Income, GSS") + | |
labs(y="Percent", x="Income") | |
#INTERPRET THE SUMMARY STATISTICS AND HISTOGRAM | |
#Comparing means in a bar graph | |
#CHANGE THE INDEPENDENT VARIABLE (replace sex) | |
#CHANGE THE DEPENDENT VARIABLE (replace conrinc) | |
#CHANGE THE LABELS | |
ggplot(data=gss123) + | |
stat_summary(aes(x=sex, y=conrinc), fun=mean, geom="bar") + | |
xlab("Sex") + | |
ylab("Mean Income by Spouse") + | |
theme(axis.text.x=element_text(angle=-45)) | |
#ANSWER THIS: Compare the results you got for your two groups. | |
#Which has a bigger mean? Which has more variability? | |
#Based on the bar graph of means, | |
#does it seem like the two categories of your | |
#dichotomous variable | |
#differ in the dependent variable? | |
#USING INFERENTAL STATISTICS TO COMPARE GROUPS | |
#Let's compare the confidence intervals for the mean of our | |
#dependent variable for the two categories of our independent variable. | |
#REPLACE men AND women WITH THE NAMES OF YOUR MINI DATASETS | |
#REPLACE conrinc WITH THE NAME OF YOUR DEPENDENT VARIABLE | |
#Comparing the CIs- 95% | |
ciMean(men$conrinc, na.rm=TRUE, conf =0.95) | |
ciMean(women$conrinc, na.rm=TRUE, conf =0.95) | |
#Comparing the CIs - 99% | |
ciMean(men$conrinc, na.rm=TRUE, conf =0.99) | |
ciMean(women$conrinc, na.rm=TRUE, conf =0.99) | |
#INTERPRET the confidence intervals and come to a conclusion about whether | |
#your independent and dependent variables are related. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment