Skip to content

Instantly share code, notes, and snippets.

@acbass49
Last active May 29, 2025 10:33
Show Gist options
  • Select an option

  • Save acbass49/783c51e1de8e82ee382a26d2b737d2e1 to your computer and use it in GitHub Desktop.

Select an option

Save acbass49/783c51e1de8e82ee382a26d2b737d2e1 to your computer and use it in GitHub Desktop.
10 Vaccines Elections And Conspiracies
import pandas as pd
import numpy as np
import survey_tools as st
from statsmodels.stats.proportion import proportions_ztest
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
# load in the data
data1 = pd.read_spss('../data/vaccine/vaccine1.sav') \
.rename(columns={'RELI1':'Q22', 'BORN': 'Q23'})
data2 = pd.read_spss('../data/vaccine/vaccine2.sav')
data3 = pd.read_spss('../data/vaccine/vaccine3.sav') \
.rename(columns={'RELI1':'Q22', 'BORN': 'Q23'})
vars_to_keep = [
'Q4', 'Q9_1', 'Q9_2', 'Q9_3', 'Q9_4', 'Q9_5', 'Q9_6', 'Q9_7', 'Q9_8', 'Q9_9', 'Q9_10', 'Q22', 'Q23', 'WEIGHT',
'QPID100'
]
for var in ['Q9_4','Q9_6', 'Q9_7', 'Q9_8', 'Q9_9', 'Q9_10']:
data2[var] = np.nan
data = pd.concat([data1[vars_to_keep], data2[vars_to_keep]], ignore_index=True)
(data.Q22 == 'Mormon').sum() #214 Mormons in 1 & 2
#105 Mormons in 1
def quick_recode(data, var, keep_categories):
new_var = var + '_rc'
data[new_var] = np.where(data[var].isin(keep_categories), 1, 0)
data.loc[data[var].isna() | data[var].isin(['Skipped/Refused']), new_var] = np.nan
return data
# recode the religion variable - case_when
conditions = [
# Evangelical Protestant: Protestant AND born again
(data['Q22'] == 'Evangelical or Protestant Christian (Baptist, Lutheran, Methodist, Presbyterian, Episcopalian, Pentecostal, Church of Ch') & (data['Q23'] == 'Yes'),
# Mainline Protestant: Protestant AND NOT born again
(data['Q22'] == 'Evangelical or Protestant Christian (Baptist, Lutheran, Methodist, Presbyterian, Episcopalian, Pentecostal, Church of Ch') & (data['Q23'] != 'Yes'),
(data['Q22'] == 'Skipped')
]
choices = [
'Evangelical Protestant',
'Mainline Protestant',
np.nan
]
# Default: keep the original religion label
data['Q22_rc'] = np.select(conditions, choices, default=data['Q22'])
# Collapsing categories with fewer than 100 cases
data['Q22_rc'] = data['Q22_rc'].replace({
"Jehovah's Witness" : 'Other Christian religion',
"Greek or Russian Orthodox" : 'Other Christian religion',
"Unitarian (Universalist)" : "Other non-Christian religion",
"Buddhist" : "Other non-Christian religion",
"Hindu" : "Other non-Christian religion",
"Islam/Muslim" : "Other non-Christian religion",
})
is_christian = {
'Buddhist': False,
'Catholic': True,
'Evangelical or Protestant Christian (Baptist, Lutheran, Methodist, Presbyterian, Episcopalian, Pentecostal, Church of Ch': True,
'Greek or Russian Orthodox': True,
'Hindu': False,
'Islam/Muslim': False,
"Jehovah's Witness": True,
'Jewish': False,
'Mormon': False, # do not want to include Mormons since we are comparing them
'No religion': False,
'Other Christian religion': True,
'Other non-Christian religion': False,
'Skipped': False,
'Unitarian (Universalist)': False # Historically Christian, now pluralistic
}
data['christian'] = data['Q22'].replace(is_christian)
#NA in this variable have recieved the vaccine
data.loc[data['Q4'].isna(), 'Q4'] = 'Get the vaccine as soon as you can'
#satan worshiping pedophiles
data = quick_recode(data, 'Q9_1', ['Completely agree', 'Mostly agree'])
# election was stolen
data = quick_recode(data, 'Q9_2', ['Completely agree', 'Mostly agree'])
# storm is coming
data = quick_recode(data, 'Q9_3', ['Completely agree', 'Mostly agree'])
# may need to resort to violence soon
data = quick_recode(data, 'Q9_5', ['Completely agree', 'Mostly agree'])
vars = ['Q4', 'Q9_1', 'Q9_2', 'Q9_3', 'Q9_4', 'Q9_5', 'Q9_6', 'Q9_7', 'Q9_8', 'Q9_9', 'Q9_10']
demos = ['Q22_rc', 'religious_c', 'QPID100']
q4_order = [
"Get the vaccine as soon as you can",
"Wait until it has been available for a while to see how it is working for other people",
"Only get the vaccine if you are required to do so for work, school, or other activities",
"Definitely not get the vaccine",
"Skipped/Refused"
]
for var in ['Q9_1', 'Q9_2', 'Q9_3', 'Q9_4', 'Q9_5', 'Q9_6', 'Q9_7', 'Q9_8', 'Q9_9', 'Q9_10']:
data[var] = data[var].replace({'Skipped': 'Skipped/Refused'})
data[var] = pd.Categorical(data[var], categories=['Completely agree', 'Mostly agree', 'Mostly disagree', 'Completely disagree', 'Skipped/Refused'], ordered=True)
for var in ['Q4']:
data[var] = data[var].replace({'Skipped': 'Skipped/Refused'})
data[var] = pd.Categorical(data[var], categories=q4_order, ordered=True)
data['religious_c'] = ((data.christian == True) & (data.QPID100 == 'Republican')).astype(int)
# create my tabs
st.make_tabs(data,vars, demos, display = 'column', to_excel = '~/Desktop/vaccine_tabs.xlsx')
st.make_tabs(data,vars, ['QPID100'], display = 'column', to_excel = '~/Desktop/vaccine_tabs3.xlsx')
data['all'] = 1
st.make_tabs(data,vars, ['all'], display = 'column', wts='WEIGHT', to_excel = '~/Desktop/vaccine_tabs2.xlsx')
st.tabs(data, "Q9_2_rc", "Q22_rc", display='column')
st.tabs(data, "Q22_rc", "QPID100", display='row')
st.tabs(data, "Q9_2", "QPID100", display='column')
# Check 3rd wave
vars_to_keep = [
'Q4', 'Q22', 'Q23', 'WEIGHT'
]
data = pd.concat([data1[vars_to_keep], data2[vars_to_keep], data3[vars_to_keep]], ignore_index=True)
# recode the religion variable - case_when
conditions = [
# Evangelical Protestant: Protestant AND born again
(data['Q22'] == 'Evangelical or Protestant Christian (Baptist, Lutheran, Methodist, Presbyterian, Episcopalian, Pentecostal, Church of Ch') & (data['Q23'] == 'Yes'),
# Mainline Protestant: Protestant AND NOT born again
(data['Q22'] == 'Evangelical or Protestant Christian (Baptist, Lutheran, Methodist, Presbyterian, Episcopalian, Pentecostal, Church of Ch') & (data['Q23'] != 'Yes'),
(data['Q22'] == 'Skipped')
]
choices = [
'Evangelical Protestant',
'Mainline Protestant',
np.nan
]
# Default: keep the original religion label
data['Q22_rc'] = np.select(conditions, choices, default=data['Q22'])
#NA in this variable have recieved the vaccine
data.loc[data['Q4'].isna(), 'Q4'] = 'Get the vaccine as soon as you can'
data.Q22.value_counts()
q4_order = [
"Get the vaccine as soon as you can",
"Wait until it has been available for a while to see how it is working for other people",
"Only get the vaccine if you are required to do so for work, school, or other activities",
"Definitely not get the vaccine",
"Skipped/Refused"
]
for var in ['Q4']:
data[var] = data[var].replace({'Skipped': 'Skipped/Refused'})
data[var] = pd.Categorical(data[var], categories=q4_order, ordered=True)
st.tabs(data, "Q4", "Q22_rc", display='column') \
.to_excel('~/Desktop/vaccine_tabs_0.xlsx', index=False)
st.tabs(data, "Q4", display='column', wts='WEIGHT')
# Quick look at the CES
data = pd.read_stata('../data/CES/CES24_Common.dta')
# recode the religion variable - case_when
conditions = [
# Evangelical Protestant: Protestant AND born again
(data['religpew'] == 'Protestant') & (data['pew_bornagain'] == 'Yes'),
# Mainline Protestant: Protestant AND NOT born again
(data['religpew'] == 'Protestant') & (data['pew_bornagain'] != 'Yes'),
(data['religpew'] == 'Agnostic') | (data['religpew'] == 'Nothing in particular') | (data['religpew'] == 'Atheist'),
(data['religpew'] == 'Buddhist') | (data['religpew'] == 'Hindu') | (data['religpew'] == 'Muslim'),
]
choices = [
'Evangelical Protestant',
'Mainline Protestant',
'No religion',
'Something else'
]
# Default: keep the original religion label
data['religpew_rc'] = np.select(conditions, choices, default=data['religpew'])
st.tabs(data, "religpew_rc", "CC24_423", display='row') \
.assign(T2B = lambda x:x['Not very much'] + x['None at all']) \
.sort_values('T2B', ascending=False) \
[['T2B']] \
.to_clipboard()
st.tabs(data, "CC24_423", display = 'column', wts="commonweight")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment