Last active
May 12, 2025 20:32
-
-
Save susandumais/d2172f27166265d1b480263ff672e86b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Always load your libraries and data first | |
library(ggplot2) | |
library(dplyr) | |
library(lsr) | |
library(descr) | |
library(Hmisc) | |
library('lehmansociology') | |
data(gss123) | |
options(scipen = 999) | |
#My example variables are fear (nominal dichotomous), wrkstat (nominal non-dichotomous, | |
#health (ordinal), and speduc (interval-ratio). | |
#Replace the variable names I use with your own variables | |
#Remember to change the titles, labels, etc. too | |
#You may also need to change the binwidth for the histogram | |
#FREQUENCY TABLES | |
#NOMINAL DICHOTOMOUS | |
frequency(gss123$fear, title= "Frequency Distribution of | |
Fear of Walking Alone at Night, GSS") | |
#NOMINAL NON-DICHOTOMOUS | |
frequency(gss123$wrkstat, title= "Frequency Distribution of | |
Employment Status, GSS") | |
#ORDINAL | |
frequency(gss123$health, cumulative.percent = TRUE, | |
title = "Frequency Distribution of | |
Self-Reported Health, GSS") | |
#INTERVAL RATIO | |
frequency(gss123$speduc, cumulative.percent = TRUE, | |
title = "Distribution of Spouses' | |
Highest Year of Education Attained, GSS") | |
#GRAPHS | |
#NOMINAL DICHOTOMOUS | |
ggplot(data=subset(gss123, !is.na(fear)), aes(x = fear)) + | |
geom_bar(color="red", fill="white", aes(y = ((..count..)/sum(..count..)))) + | |
scale_y_continuous(labels = scales::percent) + | |
ggtitle("Bar Graph of Whether Respondent is Afraid | |
to Walk Alone at Night, GSS") + | |
labs(y="Percent", x="Afraid to Walk Alone") + | |
theme(axis.text.x=element_text(angle=-45)) | |
#NOMINAL NON-DICHOTOMOUS | |
ggplot(data=subset(gss123, !is.na(wrkstat)), aes(x = wrkstat)) + | |
geom_bar(color="red", fill="white", aes(y = ((..count..)/sum(..count..)))) + | |
scale_y_continuous(labels = scales::percent) + | |
ggtitle("Bar Graph of Respodents' Employment Status, GSS") + | |
labs(y="Percent", x="Employment Status") + | |
theme(axis.text.x=element_text(angle=-45)) | |
#ORDINAL | |
ggplot(data=subset(gss123, !is.na(health)), aes(x=health)) + | |
geom_bar(color="red", fill="white", aes(y = ((..count..)/sum(..count..)))) + | |
scale_y_continuous(labels = scales::percent) + | |
ggtitle("Bar Graph of the Distribution of | |
Self-Reported Health, GSS") + | |
labs(y="Percent", x="Health Condition") + | |
theme(axis.text.x=element_text(angle=-45)) | |
#INTERVAL RATIO | |
ggplot(data = gss123, aes(x = speduc)) + | |
geom_histogram(color="blue", fill="pink", binwidth =1, | |
aes(y=(..count../sum(..count..))*100)) + | |
ggtitle("Distribution of Spouse's Highest Year | |
of Education Attained, GSS") + | |
labs(y="Percent", x="Highest Year of Education") | |
#CENTRAL TENDENCY | |
#NOMINAL DICHOTMOUS AND NON-DICHOTOMOUS | |
MODE(gss123$fear) | |
MODE(gss123$wrkstat) | |
#ORDINAL | |
frequency(gss123$health, cumulative.percent=TRUE, title="GSS Repondents' | |
Health Condition") | |
frequency(as.numeric(gss123$health)) | |
MODE(gss123$health) | |
median(as.numeric(gss123$health), na.rm=TRUE) | |
#INTERVAL RATIO | |
summary(gss123$speduc) | |
MODE(gss123$speduc) | |
median(gss123$speduc, na.rm=TRUE) | |
mean(gss123$speduc, na.rm=TRUE) | |
#MEASURES OF VARIABILITY - INTERVAL RATIO | |
#Remember that the range = maximum-minimum | |
summary(gss123$speduc, na.rm=TRUE) | |
IQR(gss123$speduc, na.rm=TRUE) | |
var(gss123$speduc, na.rm=TRUE) | |
sd(gss123$speduc, na.rm=TRUE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment