Skip to content

Instantly share code, notes, and snippets.

@acbass49
Last active May 8, 2025 23:58
Show Gist options
  • Select an option

  • Save acbass49/6cda44b7ab4fab8ff58c182b75621084 to your computer and use it in GitHub Desktop.

Select an option

Save acbass49/6cda44b7ab4fab8ff58c182b75621084 to your computer and use it in GitHub Desktop.
7 2024 State Growth Update
import pandas as pd
import numpy as np
import survey_tools as st
import plotly.express as px
# Load the CSV files into Pandas DataFrames
df_2024 = pd.read_csv("https://raw.githubusercontent.com/acbass49/scrape_lds_stats/refs/heads/main/data/state-2024-07-23.csv")
df_2025 = pd.read_csv("https://raw.githubusercontent.com/acbass49/scrape_lds_stats/refs/heads/main/data/state-2025-04-27.csv")
state_to_region = {
# West
"WA": "West", "OR": "West", "CA": "West", "NV": "West", "ID": "West",
"MT": "West", "WY": "West", "UT": "West", "CO": "West", "AK": "West", "HI": "West",
"NM": "West", "AZ": "West",
# South
"TX": "South", "OK": "South", "AR": "South", "LA": "South", "MS": "South",
"AL": "South", "TN": "South", "KY": "South", "WV": "South", "VA": "South",
"NC": "South", "SC": "South", "GA": "South", "FL": "South", "DC": "South",
"DE": "South", "MD": "South",
# Northeast
"ME": "Northeast", "NH": "Northeast", "VT": "Northeast", "MA": "Northeast",
"RI": "Northeast", "CT": "Northeast", "NY": "Northeast", "NJ": "Northeast",
"PA": "Northeast",
# Midwest
"ND": "Midwest", "SD": "Midwest", "NE": "Midwest", "KS": "Midwest",
"MN": "Midwest", "IA": "Midwest", "MO": "Midwest", "WI": "Midwest",
"IL": "Midwest", "IN": "Midwest", "MI": "Midwest", "OH": "Midwest"
}
abbreviation_to_name = {
# https://en.wikipedia.org/wiki/List_of_states_and_territories_of_the_United_States#States.
"AK": "Alaska",
"AL": "Alabama",
"AR": "Arkansas",
"AZ": "Arizona",
"CA": "California",
"CO": "Colorado",
"CT": "Connecticut",
"DE": "Delaware",
"FL": "Florida",
"GA": "Georgia",
"HI": "Hawaii",
"IA": "Iowa",
"ID": "Idaho",
"IL": "Illinois",
"IN": "Indiana",
"KS": "Kansas",
"KY": "Kentucky",
"LA": "Louisiana",
"MA": "Massachusetts",
"MD": "Maryland",
"ME": "Maine",
"MI": "Michigan",
"MN": "Minnesota",
"MO": "Missouri",
"MS": "Mississippi",
"MT": "Montana",
"NC": "North Carolina",
"ND": "North Dakota",
"NE": "Nebraska",
"NH": "New Hampshire",
"NJ": "New Jersey",
"NM": "New Mexico",
"NV": "Nevada",
"NY": "New York",
"OH": "Ohio",
"OK": "Oklahoma",
"OR": "Oregon",
"PA": "Pennsylvania",
"RI": "Rhode Island",
"SC": "South Carolina",
"SD": "South Dakota",
"TN": "Tennessee",
"TX": "Texas",
"UT": "Utah",
"VA": "Virginia",
"VT": "Vermont",
"WA": "Washington",
"WI": "Wisconsin",
"WV": "West Virginia",
"WY": "Wyoming",
# https://en.wikipedia.org/wiki/List_of_states_and_territories_of_the_United_States#Federal_district.
"DC": "District of Columbia",
# https://en.wikipedia.org/wiki/List_of_states_and_territories_of_the_United_States#Inhabited_territories.
"AS": "American Samoa",
"GU": "Guam GU",
"MP": "Northern Mariana Islands",
"PR": "Puerto Rico PR",
"VI": "U.S. Virgin Islands",
}
name_to_abbreviation = {value: key for key, value in abbreviation_to_name.items()}
# Extract the year from the filename and add it as a new column
df_2024['Year'] = 2024
df_2025['Year'] = 2025
# Concatenate the two DataFrames
df_combined = pd.concat([df_2024, df_2025], ignore_index=True)
# Group by `Name` and `Year` and sum the `TotalChurchMembership`
df_grouped = df_combined.groupby(['Name', 'Year'])['TotalChurchMembership'].sum().unstack()
# Calculate the percentage change from 2024 to 2025
df_grouped['Percent Change'] = ((df_grouped[2025] - df_grouped[2024]) / df_grouped[2024]) * 100
# Reset the index to make 'Name' a column
df_result = df_grouped.reset_index()
df_result.Name = df_result.Name.apply(lambda x: name_to_abbreviation.get(x, x))
df_result['region'] = df_result.Name.apply(lambda x: state_to_region.get(x,x))
# saving for when I combine with congregations later
df_result0 = df_result.copy()
# Define the color scale
color_scale = [
(0, 'red'), # Negative change (red)
(0.1, 'yellow'), # Little to no change (yellow)
(1, 'green') # Positive change (green)
]
# Create the heatmap
fig = px.choropleth(
df_result,
locations='Name', # Use 'Name' as the location column (state abbreviations)
locationmode="USA-states", # Specify US states
color='Percent Change',
color_continuous_scale=color_scale,
scope="usa", # Restrict the map to the USA
title='Percent Change in Total Church Membership By State (2023 to 2024)'
)
# Update layout for background color, font, and caption
fig.update_layout(
paper_bgcolor="#f5f5f5", # Set background color
font=dict(family="Cairo"), # Set font to Cairo
annotations=[
dict(
text=" @mormon_metrics<br>data:churchofjesuschrist.org", # Caption text
x=1, # Center the caption
y=-0.15, # Position below the chart
showarrow=False,
xanchor="right",
font=dict(size=12, color="black")
)
]
)
# Save the plot
fig.show()
# Save as png
fig.write_image("../images/6_state_change_1.png")
### Now let's look at change in congregations ###
# Group by `Name` and `Year` and sum the `Congregations`
df_grouped = df_combined.groupby(['Name', 'Year'])['Congregations'].sum().unstack()
# Calculate the percentage change from 2024 to 2025
df_grouped['Congregation Change'] = (df_grouped[2025] - df_grouped[2024])
# Reset the index to make 'Name' a column
df_result = df_grouped.reset_index()
df_result.Name = df_result.Name.apply(lambda x: name_to_abbreviation.get(x, x))
df_result2 = df_result.copy()
df_result2['Congregation Change'] = np.where(df_result2['Congregation Change'] > 30, 30, df_result2['Congregation Change'])
df_result2['Congregation Change'] = np.where(df_result2['Congregation Change'] < -30, -30,df_result2['Congregation Change'])
# Define the color scale
color_scale = [
(0, 'red'), # Negative change (red)
(0.5, 'yellow'), # Little to no change (yellow)
(1, 'green') # Positive change (green)
]
# Create the heatmap
fig = px.choropleth(
df_result2,
locations='Name', # Use 'Name' as the location column (state abbreviations)
locationmode="USA-states", # Specify US states
color='Congregation Change',
color_continuous_scale=color_scale,
scope="usa", # Restrict the map to the USA
title='Change in Congregations By State (2023 to 2024)'
)
# Update layout for background color, font, and caption
fig.update_layout(
paper_bgcolor="#f5f5f5", # Set background color
font=dict(family="Cairo"), # Set font to Cairo
annotations=[
dict(
text=" @mormon_metrics<br>data:churchofjesuschrist.org", # Caption text
x=1, # Center the caption
y=-0.15, # Position below the chart
showarrow=False,
xanchor="right",
font=dict(size=12, color="black")
)
]
)
# Save the plot
fig.show()
# Save as png
fig.write_image("../images/6_state_change_2.png")
# Categorize the data into 3 groups
df_result['Category'] = pd.cut(
df_result['Congregation Change'],
bins=[-float('inf'), -1, 0, float('inf')], # Define thresholds for categories
labels=['Decrease', 'No Change', 'Increase'] # Labels for the categories
)
# Define a discrete color scale
color_discrete_map = {
'Decrease': 'red', # Color for decrease
'No Change': 'yellow', # Color for no change
'Increase': 'green' # Color for increase
}
# Create the choropleth map with categorical data
fig = px.choropleth(
df_result,
locations='Name', # Use 'Name' as the location column
locationmode="USA-states",
color='Category', # Use the categorical column for color
color_discrete_map=color_discrete_map, # Apply the discrete color scale
scope="usa",
title='Categorical Change in Congregations By State (2023 to 2024)'
)
# Update layout for background color, font, and caption
fig.update_layout(
paper_bgcolor="#f5f5f5", # Set background color
font=dict(family="Cairo"), # Set font to Cairo
annotations=[
dict(
text="@mormon_metrics<br>data:churchofjesuschrist.org", # Caption text
x=1, # Right-align the caption
y=-0.15, # Position below the chart
showarrow=False,
xanchor="right",
font=dict(size=12, color="black")
)
]
)
# Show the plot
fig.show()
# Save as png
fig.write_image("../images/6_state_change_3.png")
df_result0 = df_result0.rename(columns={2024: 'members_2024', 2025: 'members_2025'})
# Finding top 10 and bottom 10 countries
df_top_n_bot = df_result \
[['Name', 'Congregation Change']] \
.merge(df_result0[['Name', 'Percent Change', 'members_2024', 'members_2025', 'region']], on='Name') \
.fillna(0)
df_top_n_bot['cong_percentiles'] = df_top_n_bot['Congregation Change'].rank(pct=True) * 100
df_top_n_bot['memb_percentiles'] = df_top_n_bot['Percent Change'].rank(pct=True) * 100
df_top_n_bot['combinded_percentiles'] = (df_top_n_bot['cong_percentiles'] + df_top_n_bot['memb_percentiles']) / 2
#top countries
df_top_n_bot \
.sort_values('combinded_percentiles',ascending=False) \
[['Name', 'Congregation Change', 'members_2024','members_2025', 'Percent Change', 'combinded_percentiles']] \
.head(10) \
.to_csv('~/Desktop/Top_10_Countries.csv', index=False)
# bottom countries
df_top_n_bot \
.sort_values('combinded_percentiles',ascending=True) \
[['Name', 'Congregation Change', 'members_2024','members_2025', 'Percent Change', 'combinded_percentiles']] \
.head(10) \
.to_csv('~/Desktop/Bottom_10_Countries.csv', index=False)
# top by people added
df_top_n_bot['diff'] = df_top_n_bot['members_2025'] - df_top_n_bot['members_2024']
df_top_n_bot['part_of_total'] = (df_top_n_bot['diff'] / df_top_n_bot['diff'].sum() * 100).round(1)
df_top_n_bot[['Name','members_2024','members_2025','Percent Change', 'diff', 'part_of_total']] \
.sort_values('diff', ascending=False) \
.head(10) \
.to_csv('~/Desktop/Top_10_Countries_by_people_added.csv', index=False)
# total member change by region
df_top_n_bot \
.groupby('region') \
.agg({'diff':'sum'}) \
.reset_index() \
.assign(percent_of_total=lambda x: x['diff'] / x['diff'].sum())
df_top_n_bot \
.groupby('region') \
.agg({'members_2025':'sum'}) \
.reset_index() \
.assign(percent_of_total=lambda x: x['members_2025'] / x['members_2025'].sum())
# Create a treemap
fig = px.treemap(
df_top_n_bot.query('diff > 0'), # Filter for positive changes
path=['region', 'Name'], # Group by region, then state
values='part_of_total', # Size of the boxes based on the 'diff' column
color='region', # Color by region
color_discrete_map={
"West": "blue",
"South": "green",
"Northeast": "orange",
"Midwest": "purple"
}, # Assign specific colors to regions
title="Total Membership Change as Proportion by State and Region (2023 to 2024)"
)
# Update layout for better readability
fig.update_layout(
font=dict(family="Cairo", size=14), # Set font
paper_bgcolor="#f5f5f5", # Set background color
margin=dict(t=50, l=25, r=25, b=40), # Adjust margins
annotations=[
dict(
text="@mormon_metrics<br>data:churchofjesuschrist.org", # Caption text
x=1, # Right-align the caption
y=-0.1, # Position below the chart
showarrow=False,
xanchor="right",
font=dict(size=12, color="black")
)
]
)
fig.data[0].textinfo = 'label+value' # Show both the state name (label) and the diff value
# Show the treemap
fig.show()
# Create a treemap
fig = px.treemap(
df_top_n_bot.assign(percent_of_total = lambda x:(x['members_2025']/x['members_2025'].sum() * 100).round(1)),
path=['region', 'Name'], # Group by region, then state
values='percent_of_total', # Size of the boxes based on the 'diff' column
color='region', # Color by region
color_discrete_map={
"West": "blue",
"South": "green",
"Northeast": "orange",
"Midwest": "purple"
}, # Assign specific colors to regions
title="Total Membership as a Proportion by State and Region in 2024"
)
# Update layout for better readability
fig.update_layout(
font=dict(family="Cairo", size=14), # Set font
paper_bgcolor="#f5f5f5", # Set background color
margin=dict(t=50, l=25, r=25, b=40), # Adjust margins
annotations=[
dict(
text="@mormon_metrics<br>data:churchofjesuschrist.org", # Caption text
x=1, # Right-align the caption
y=-0.1, # Position below the chart
showarrow=False,
xanchor="right",
font=dict(size=12, color="black")
)
]
)
fig.data[0].textinfo = 'label+value' # Show both the state name (label) and the diff value
# Show the treemap
fig.show()
# adding in the CES data to show the proportion of self-identified members over time
data = pd.read_stata('../data/CES/CES24_Common.dta')
old_data = pd.read_stata('../data/CES/cumulative_2006-2023.dta')
st.get_names(data, "state")
data['state'] = data.inputstate
data['jellobelt'] = data.state.isin(['Idaho', 'Utah', 'Arizona'])
data['Mormon'] = data['religpew'].isin(['Mormon'])
data['year'] = 2024
data = data.query('jellobelt == True')
old_data['jellobelt'] = old_data.state.isin(['Idaho', 'Utah', 'Arizona'])
old_data['Mormon'] = old_data['religion'].isin(['Mormon'])
old_data = old_data.query('jellobelt == True')
old_data['year']
final_data = pd.concat(
[data[['state', 'Mormon', 'year']],
old_data[['state', 'Mormon', 'year']]], ignore_index=True)
st.tabs(final_data.query('state == "Idaho"'), 'Mormon', 'year', display='column') \
.sort_index(ascending=False) \
.head(1) \
.to_csv('~/Desktop/Idaho.csv', index=False)
st.tabs(final_data.query('state == "Arizona"'), 'Mormon', 'year', display='column') \
.sort_index(ascending=False) \
.head(1) \
.to_csv('~/Desktop/Arizona.csv', index=False)
st.tabs(final_data.query('state == "Utah"'), 'Mormon', 'year', display='column') \
.sort_index(ascending=False) \
.head(1) \
.to_csv('~/Desktop/Utah.csv', index=False)
#gist https://gist.github.com/acbass49/6cda44b7ab4fab8ff58c182b75621084
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment