Skip to content

Instantly share code, notes, and snippets.

wave5addhealth$H5HR2cat <- wave5addhealth$H5HR2
wave5addhealth$H5HR2cat <- factor(wave5addhealth$H5HR2cat, levels = c(1,2,3,5,6),
labels = c("Own Place", "Parents' Home", "Another Person's Home", "Homeless", "Other"))
frequency(wave5addhealth$H5HR2, title= "Frequency Distribution of Living Arrangements, Wave 5 Add Health")
MODE(wave5addhealth$H5HR2)
ggplot(data=subset(wave5addhealth, !is.na(H5HR2cat)), aes(x = H5HR2cat)) +
geom_bar(color="blue", fill="yellow", aes(y = ((..count..)/sum(..count..)))) +
scale_y_continuous(labels = scales::percent) +
std_subdata<-na.omit(vac_data[,c("SP.DYN.IMRT.IN", "GC.TAX.TOTL.GD.ZS", "MS.MIL.TOTL.TF.ZS", "IQ.SCI.OVRL", "icrg_qog", "wbgi_gee","polity2")])
std_subdata$std_SP.DYN.IMRT.IN<-scale(std_subdata$SP.DYN.IMRT.IN)
std_subdata$std_GC.TAX.TOTL.GD.ZS<-scale(std_subdata$GC.TAX.TOTL.GD.ZS)
std_subdata$std_MS.MIL.TOTL.TF.ZS<-scale(std_subdata$MS.MIL.TOTL.TF.ZS)
std_subdata$std_IQ.SCI.OVRL<-scale(std_subdata$IQ.SCI.OVRL)
std_subdata$std_icrg_qog<-scale(std_subdata$icrg_qog)
std_subdata$std_wbgi_gee<-scale(std_subdata$wbgi_gee)
std_subdata$std_polity2<-scale(std_subdata$polity2)
##The R code here needs to go below where you created labels for your categorical variable values.
####DESCRIPTIVE STATISTICS CODE####
#note: for a categorical variable, only mode is appropriate (and median if the variable is ordinal) and you can see the
# mode in the frequency table by looking for the variable value with the largest percent
MODE(wave5addhealth$H5HR2)
#Bar graph is appropriate for a categorical variable.
#Now get a bar graph
#START BY LOADING LIBRARIES AND OPTIONS
library(aws.s3)
library(ggplot2)
library(dplyr)
library(lsr)
library(descr)
library(Hmisc)
library('lehmansociology')
options(scipen = 999)
#START BY LOADING LIBRARIES AND OPTIONS
library(aws.s3)
library(ggplot2)
library(dplyr)
library(lsr)
library(descr)
library(Hmisc)
library('lehmansociology')
options(scipen = 999)
#START BY LOADING LIBRARIES AND OPTIONS
library(aws.s3)
library(ggplot2)
library(dplyr)
library(lsr)
library(descr)
library(Hmisc)
library('lehmansociology')
options(scipen = 999)
#START BY LOADING LIBRARIES AND OPTIONS
library(aws.s3)
library(ggplot2)
library(dplyr)
library(lsr)
library(descr)
library(Hmisc)
library('lehmansociology')
options(scipen = 999)
#Load the libraries and data first
library(ggplot2)
library(dplyr)
library(lsr)
library(descr)
library(Hmisc)
library('lehmansociology')
data(gss123)
options(scipen = 999)
#make sure you have this near the top of your script so your p-values aren't in scientific notation
options(scipen = 999)
#run an analysis of variance (ANOVA); type DV ~ IV
data.aov1 <- aov(gss123$physhlth ~ gss123$race, data=gss123)
summary(data.aov1)
by(gss123$physhlth, gss123$race, mean, na.rm=T)
#remove hashtag on line below to run Tukey ONLY if the F test is statistically significant
#TukeyHSD(data.aov1)
# Example: natheal independent, physhlth dependent
#Comparing means in a bar graph
ggplot(data=subset(gss123, !is.na(natheal))) +
stat_summary(aes(x=natheal, y=physhlth), fun=mean, geom="bar") +
xlab("Opinions on Health Spending") +
ylab("Mean Number of Days of Poor Physical Health")
# Does it look like there are
#differences in the dependent variable based on the categories of the