Skip to content

Instantly share code, notes, and snippets.

@acbass49
Created April 15, 2025 05:28
Show Gist options
  • Select an option

  • Save acbass49/45efa47f9c60effbe7c320ea5d232c57 to your computer and use it in GitHub Desktop.

Select an option

Save acbass49/45efa47f9c60effbe7c320ea5d232c57 to your computer and use it in GitHub Desktop.
5 Election Mormon Scorecards
import pandas as pd
import numpy as np
import survey_tools as st
# Load the data
data = pd.read_stata('../data/CES/CES24_Common.dta')
old_data = pd.read_stata('../data/CES/cumulative_2006-2023.dta')
# How many mormons are in the data set?
st.tabs(data, "religpew")
# There are 623 Mormons in the 2024 CES data set
# variables to look at:
# gender ✅
# race ✅
# age ✅
# education ✅
# region ✅
# church attendance ✅
# religion importance ✅
# Gender X Age ✅
# Race X Education ✅
# First, I need to recode the data and join it
vars = [
'gender',
'educ_cat',
'age_cat',
'Utah',
'race_cat',
'attend',
'relig_imp_rc',
'over_under_45',
'white',
'has_child',
'marstat_cat',
'inc',
'educ_cat2',
'genderXover_under_45',
'educ_cat2Xwhite',
'educ_cat2Xhas_child',
'genderXhas_child',
'marstat_catXgender',
'age_catXgender'
]
# add parents, parentXgender, add weights to overall scorecard, profession/homemaker,
# marital status.
dvs = [
'vote_08',
'vote_12',
'vote_16',
'vote_20',
'vote_24'
]
other_vars = [
'year',
'mormon',
'weight'
]
data['marstat_cat'] = data.marstat.replace({'Separated':'Other', 'Widowed':'Other', 'Domestic / civil partnership':'Other'})
old_data['marstat_cat'] = old_data.marstat.replace({'Separated':'Other', 'Widowed':'Other', 'Domestic Partnership':'Other', 'Single / Never Married':'Never married'})
data['year'] = 2024
old_data['mormon'] = np.where(old_data.religion == "Mormon", "Mormon", "Not Mormon")
data['mormon'] = np.where(data.religpew == "Mormon", "Mormon", "Not Mormon")
data['has_child'] = data.child18
data['weight'] = data.commonweight
st.tabs(data[data.religpew == 'Mormon'], "gender4", display="count")
# I'm going to drop the 2 non-binary and 2 other responses to the gender question
data['gender'] = st.recode(data, "gender4", "'Man' = 'Male';'Woman'='Female'; 'Non-binary' = NaN; 'Other' = NaN")
data['race_cat'] = st.recode(data, 'race', "'Asian' = 'Other'; 'Native American' = 'Other'; 'Mixed' = 'Other'; 'Middle Eastern' = 'Other'; 'Two or more races' = 'Other'")
old_data['race_cat'] = st.recode(old_data, 'race',"'Asian' = 'Other'; 'Native American' = 'Other'; 'Mixed' = 'Other'; 'Middle Eastern' = 'Other'; 'Two or more races' = 'Other'")
data["age"] = 2024 - data.birthyr
data["age_cat"] = st.recode(data, "age", "18:30='18:30'; 31:45='31:45'; 46:64='46:64'; 65:110='65+'")
old_data["age_cat"] = st.recode(old_data, "age", "18:30='18:30'; 31:45='31:45'; 46:64='46:64'; 65:110='65+'")
old_data["educ_cat"] = st.recode(old_data, "educ", "'No HS' = 'HS or Less'; 'High School Graduate' = 'HS or Less'; '2-Year' = 'Some College'")
data["educ_cat"] = st.recode(data, "educ", "'No HS' = 'HS or Less'; 'High school graduate' = 'HS or Less'; '2-year' = 'Some college'")
data.educ_cat.replace({'Some college': 'Some College'}, inplace=True)
data.educ_cat.replace({'Post-grad': 'Post-Grad'}, inplace=True)
data.educ_cat.replace({'4-year': '4-Year'}, inplace=True)
data['Utah'] = np.where(data.inputstate == "Utah", "Utah", "Not Utah")
old_data['Utah'] = np.where(old_data.state == "Utah", "Utah", "Not Utah")
data['attend'] = np.where(data.pew_churatd.isin(['More than once a week', 'Once a week']), "Weekly", "Less than weekly")
old_data['attend'] = np.where(old_data.relig_church.isin(['More Than Once a Week', 'Once a Week']), "Weekly", "Less than weekly")
data['relig_imp_rc'] = np.where(data.pew_religimp.isin(['Very important', 'Somewhat important']), "Important", "Not Important")
old_data['relig_imp_rc'] = np.where(old_data.relig_imp.isin(['Very Important', 'Somewhat Important']), "Important", "Not Important")
data['over_under_45'] = np.where(data["age_cat"].isin(['18:30','31:45']), "Under 45", "Over 45")
old_data['over_under_45'] = np.where(old_data["age_cat"].isin(['18:30','31:45']), "Under 45", "Over 45")
data = st.make_interaction(data, 'gender', 'over_under_45')
old_data = st.make_interaction(old_data, 'gender', 'over_under_45')
data['white'] = np.where(data['race_cat'].isin(['White']), "White", "Not White")
old_data['white'] = np.where(old_data['race_cat'].isin(['White']), "White", "Not White")
data['educ_cat2'] = np.where(data['educ_cat'].isin(['HS or Less', 'Some college']), "Less Than Bachelors", "Bachelors or More")
old_data['educ_cat2'] = np.where(old_data['educ_cat'].isin(['HS or Less', 'Some College']), "Less Than Bachelors", "Bachelors or More")
data['inc'] = data.faminc_new.cat.codes.replace({
0: '<$50k',
1: '<$50k',
2: '<$50k',
3: '<$50k',
4: '<$50k',
5: '$50k-$100k',
6: '$50k-$100k',
7: '$50k-$100k',
8: '$50k-$100k',
9: '>$100k',
10: '>$100k',
11: '>$100k',
12: '>$100k',
13: '>$100k',
14: '>$100k',
15: '>$100k',
16: np.nan,
-1: np.nan
})
old_data['inc'] = old_data.faminc.cat.codes.replace({
0: '<$50k',
1: '<$50k',
2: '<$50k',
3: '<$50k',
4: '<$50k',
5: '$50k-$100k',
6: '$50k-$100k',
7: '$50k-$100k',
8: '$50k-$100k',
9: '>$100k',
10: '>$100k',
11: '>$100k',
12:np.nan,
13:np.nan,
-1:np.nan
})
data = st.make_interaction(data, 'educ_cat2', 'white')
old_data = st.make_interaction(old_data, 'educ_cat2', 'white')
data = st.make_interaction(data, 'educ_cat2', 'has_child')
old_data = st.make_interaction(old_data, 'educ_cat2', 'has_child')
data = st.make_interaction(data, 'gender', 'has_child')
old_data = st.make_interaction(old_data, 'gender', 'has_child')
data = st.make_interaction(data, 'marstat_cat', 'gender')
old_data = st.make_interaction(old_data, 'marstat_cat', 'gender')
data = st.make_interaction(data, 'age_cat', 'gender')
old_data = st.make_interaction(old_data, 'age_cat', 'gender')
# Now I need to recode the election variables
old_data.loc[old_data.voted_pres_08 == "Not Sure / Don't Recall", "voted_pres_08"] = np.nan
old_data.loc[~old_data.year.isin([2008,2009]), "voted_pres_08"] = np.nan
old_data['vote_08'] = st.recode(old_data, "voted_pres_08", ''' 'Barack Obama' = 'Democrat'; 'John McCain' = 'Republican'; 'Other / Someone Else' = 'Other'; 'Did not Vote' = NaN; 'Did not Vote for this Office' = NaN ''')
data['vote_08'] = np.nan
old_data.loc[old_data.voted_pres_12 == "Not Sure / Don't Recall", "voted_pres_12"] = np.nan
old_data.loc[~old_data.year.isin([2012,2013]), "voted_pres_12"] = np.nan
old_data['vote_12'] = st.recode(old_data, "voted_pres_12", ''' 'Barack Obama' = 'Democrat'; 'Mitt Romney' = 'Republican'; 'Other / Someone Else' = 'Other'; 'Did not Vote' = NaN; 'Did not Vote for this Office' = NaN ''')
data['vote_12'] = np.nan
old_data[old_data.voted_pres_16 == "Not Sure / Don't Recall"] = np.nan
old_data.loc[~old_data.year.isin([2016,2017]), "voted_pres_16"] = np.nan
old_data['vote_16'] = st.recode(old_data, "voted_pres_16", ''' 'Hilary Clinton' = 'Democrat'; 'Donald Trump' = 'Republican'; 'Other / Someone Else' = 'Other'; 'Not Sure / Don\'t Recall' = NaN; 'Did not Vote for this Office' = NaN ''')
data['vote_16'] = np.nan
old_data.loc[~old_data.year.isin([2020,2021]), "voted_pres_20"] = np.nan
old_data['vote_20'] = st.recode(old_data, "voted_pres_20", ''' 'Joe Biden' = 'Democrat'; 'Donald Trump' = 'Republican'; 'Other / Someone Else' = 'Other'; 'Not Sure' = NaN; 'Did not Vote for President' = NaN ''')
data['vote_20'] = np.nan
data['vote_24'] = st.recode(data, "CC24_410", "'Kamala Harris' = 'Democrat'; 'Donald Trump' = 'Republican'; 'Robert F. Kennedy, Jr.' = 'Other'; Jill Stein' = 'Other'; 'Cornel West' = 'Other'; 'Chase Oliver' = 'Other'; 'Did not vote for President' = NaN")
old_data['vote_24'] = np.nan
combined_data = pd.concat([data[vars + dvs + other_vars], old_data[vars + dvs + other_vars]], axis=0)
combined_data['Overall'] = "Overall"
vars = ['Overall'] + vars
# lets do the scorecard for 1 demo
def one_demo(data, demo, dvs, weight = None):
list_of_dicts = []
combined_data[demo] = combined_data[demo].astype('category')
for cat in combined_data[demo].cat.categories.to_list():
tmp_dict = {}
tmp_dict['demo'] = demo
tmp_dict['name'] = cat
for dv in dvs:
if weight:
tmp_tab = st.tabs(data.query(f"{demo} == '{cat}'"), dv, display = 'column', wts = weight)
else:
tmp_tab = st.tabs(data.query(f"{demo} == '{cat}'"), dv, display = 'column')
if 'Republican' not in tmp_tab.index.to_list() or 'Democrat' not in tmp_tab.index.to_list():
score = np.nan
else:
rep_idx = tmp_tab.index.to_list().index("Republican")
dem_idx = tmp_tab.index.to_list().index("Democrat")
score = tmp_tab.iloc[rep_idx] - tmp_tab.iloc[dem_idx]
tmp_dict[dv] = score
list_of_dicts.append(tmp_dict)
return pd.DataFrame(list_of_dicts)
# Now I can run the scorecard for each demographic
genpop_scorecard = pd.DataFrame()
for var in vars:
tmp_scorecard = one_demo(combined_data, var, dvs, weight = 'weight')
genpop_scorecard = pd.concat([genpop_scorecard, tmp_scorecard], axis=0)
genpop_scorecard = genpop_scorecard.round(2)
genpop_scorecard
mormon_scorecard = pd.DataFrame()
for var in vars:
tmp_scorecard = one_demo(combined_data.query("mormon == 'Mormon'"), var, dvs)
mormon_scorecard = pd.concat([mormon_scorecard, tmp_scorecard], axis=0)
mormon_scorecard = mormon_scorecard.round(2)
mormon_scorecard
# Write the scorecards to excel in different tabs
with pd.ExcelWriter('../data/CES/voting_scorecard.xlsx') as writer:
genpop_scorecard.to_excel(writer, sheet_name='genpop_scorecard', index=False)
mormon_scorecard.to_excel(writer, sheet_name='mormon_scorecard', index=False)
# what are the stories?
# 1. Young Mormons went way left - away from Trump
# 2. Strong Education Gap with More educated Mormons going left
# 3. Non-white Mormons are swinging right to Trump - Now a non-white Mormon is
# more likely to have voted for Trump than a white Mormon
# 4. Starting to be a voting gender divide among single members
st.tabs(old_data, "vote_08", wts='weight', display="column")
st.tabs(old_data, "vote_12", wts='weight', display="column")
st.tabs(old_data, "vote_16", wts='weight', display="column")
st.tabs(old_data, "vote_20", wts='weight', display="column")
st.tabs(data, "vote_24", wts='weight', display="column")
st.tabs(old_data[old_data.religion == "Mormon"], "vote_08", display="column")
st.tabs(old_data[old_data.religion == "Mormon"], "vote_12", display="column")
st.tabs(old_data[old_data.religion == "Mormon"], "vote_16", display="column")
st.tabs(old_data[old_data.religion == "Mormon"], "vote_20", display="column")
st.tabs(data[data.religpew == "Mormon"], "vote_24", display="column")
#gist: https://gist.github.com/acbass49/2aa740c19a393437c68ad1bfb3b55271
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment