Skip to content

Instantly share code, notes, and snippets.

@wmay
Created March 30, 2019 16:51
Show Gist options
  • Save wmay/8c36ec20d3f98eebbce4eeebd1664416 to your computer and use it in GitHub Desktop.
Save wmay/8c36ec20d3f98eebbce4eeebd1664416 to your computer and use it in GitHub Desktop.
Quick analysis of recent sex trends (2008-2018) with GSS data
## looking at sex trends
library(foreign)
## oh wow this is a large file
gss = read.spss('/home/will/research/gss/GSS7218_R1.sav', to.data.frame = T)
## do a bit of organizing
names(gss) = tolower(names(gss))
gss$wtssall = as.numeric(as.character(gss$wtssall))
gss$age = as.numeric(as.character(gss$age))
## a bit of exploratory data analysis
year_counts = table(gss$year)
plot(as.integer(names(year_counts)), as.numeric(year_counts))
## let's subset this for later
gss = subset(gss, year >= 1988)
useful_vars = c('year', 'wtssall', 'sex', 'age', 'race',
'marital', 'sexfreq', 'partners')
gss = gss[, useful_vars]
save(gss, file = 'gss_sex.Rdata')
## hypothesis tests
library(reshape2)
library(ggplot2)
load('gss_sex.Rdata')
## organize the data a bit
gss$no_sex = gss$sexfreq == 'NOT AT ALL'
gss$no_partners = gss$partners == 'NO PARTNERS'
## also subset by age for these analyses
gss = subset(gss, age <= 30)
id_vars = c('year', 'wtssall', 'sex', 'age', 'race',
'marital')
long_gss = melt(gss, id.vars = id_vars, measure.vars = c('no_sex', 'no_partners'))
## 1) trend (linear regression with percents)
## check some graphs
ysex = aggregate(wtssall ~ variable + year + value, FUN = sum, data = long_gss)
ysex_df = dcast(ysex, variable + year ~ value, value.var = 'wtssall')
ysex_df$no_sex_p = ysex_df$`TRUE` / (ysex_df$`FALSE` + ysex_df$`TRUE`)
ggplot(ysex_df, aes(year, no_sex_p)) +
geom_line() +
facet_wrap(~ variable, nrow = 2)
## split up by sex
ysex2 = aggregate(wtssall ~ variable + year + sex + value, FUN = sum, data = long_gss)
ysex2_df = dcast(ysex2, variable + year + sex ~ value, value.var = 'wtssall')
ysex2_df$no_sex_p = ysex2_df$`TRUE` / (ysex2_df$`FALSE` + ysex2_df$`TRUE`)
ggplot(ysex2_df, aes(year, no_sex_p, color = sex)) +
geom_line() +
facet_wrap(~ variable, nrow = 2)
## linear probability model
lp1 = lm(no_sex ~ year, data = gss, weights = gss$wtssall)
lp2 = lm(no_sex ~ year * sex, data = gss, weights = gss$wtssall)
lp3 = lm(no_partners ~ year, data = gss, subset = year >= 2008, weights = gss$wtssall)
lp4 = lm(no_partners ~ year * sex, data = gss, subset = year >= 2008, weights = gss$wtssall)
## same but with logistic regression
lr1 = glm(no_sex ~ year, data = gss, weights = gss$wtssall, family = 'binomial')
lr2 = glm(no_sex ~ year * sex, data = gss, weights = gss$wtssall, family = 'binomial')
lr3 = glm(no_partners ~ year, data = gss, subset = year >= 2008,
weights = gss$wtssall, family = 'binomial')
lr4 = glm(no_partners ~ year * sex, data = gss, subset = year >= 2008,
weights = gss$wtssall, family = 'binomial')
## 2) 2018 significantly higher than 2008 (2-sample t-test)
## actually nevermind this is more of a prop.test kind of thing
## no_sex
no_sex_tab = subset(ysex_df, year %in% c(2008, 2018) &
variable == 'no_partners',
select = c(`TRUE`, `FALSE`))
no_sex_tab = as.table(as.matrix(no_sex_tab))
prop.test(no_sex_tab, alternative = 'less')
## men only
no_sex_tab = subset(ysex2_df, year %in% c(2008, 2018) &
variable == 'no_partners' &
sex == 'MALE',
select = c(`TRUE`, `FALSE`))
no_sex_tab = as.table(as.matrix(no_sex_tab))
prop.test(no_sex_tab, alternative = 'less')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment