Last active
May 8, 2025 23:58
-
-
Save acbass49/6cda44b7ab4fab8ff58c182b75621084 to your computer and use it in GitHub Desktop.
7 2024 State Growth Update
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import numpy as np | |
| import survey_tools as st | |
| import plotly.express as px | |
| # Load the CSV files into Pandas DataFrames | |
| df_2024 = pd.read_csv("https://raw.githubusercontent.com/acbass49/scrape_lds_stats/refs/heads/main/data/state-2024-07-23.csv") | |
| df_2025 = pd.read_csv("https://raw.githubusercontent.com/acbass49/scrape_lds_stats/refs/heads/main/data/state-2025-04-27.csv") | |
| state_to_region = { | |
| # West | |
| "WA": "West", "OR": "West", "CA": "West", "NV": "West", "ID": "West", | |
| "MT": "West", "WY": "West", "UT": "West", "CO": "West", "AK": "West", "HI": "West", | |
| "NM": "West", "AZ": "West", | |
| # South | |
| "TX": "South", "OK": "South", "AR": "South", "LA": "South", "MS": "South", | |
| "AL": "South", "TN": "South", "KY": "South", "WV": "South", "VA": "South", | |
| "NC": "South", "SC": "South", "GA": "South", "FL": "South", "DC": "South", | |
| "DE": "South", "MD": "South", | |
| # Northeast | |
| "ME": "Northeast", "NH": "Northeast", "VT": "Northeast", "MA": "Northeast", | |
| "RI": "Northeast", "CT": "Northeast", "NY": "Northeast", "NJ": "Northeast", | |
| "PA": "Northeast", | |
| # Midwest | |
| "ND": "Midwest", "SD": "Midwest", "NE": "Midwest", "KS": "Midwest", | |
| "MN": "Midwest", "IA": "Midwest", "MO": "Midwest", "WI": "Midwest", | |
| "IL": "Midwest", "IN": "Midwest", "MI": "Midwest", "OH": "Midwest" | |
| } | |
| abbreviation_to_name = { | |
| # https://en.wikipedia.org/wiki/List_of_states_and_territories_of_the_United_States#States. | |
| "AK": "Alaska", | |
| "AL": "Alabama", | |
| "AR": "Arkansas", | |
| "AZ": "Arizona", | |
| "CA": "California", | |
| "CO": "Colorado", | |
| "CT": "Connecticut", | |
| "DE": "Delaware", | |
| "FL": "Florida", | |
| "GA": "Georgia", | |
| "HI": "Hawaii", | |
| "IA": "Iowa", | |
| "ID": "Idaho", | |
| "IL": "Illinois", | |
| "IN": "Indiana", | |
| "KS": "Kansas", | |
| "KY": "Kentucky", | |
| "LA": "Louisiana", | |
| "MA": "Massachusetts", | |
| "MD": "Maryland", | |
| "ME": "Maine", | |
| "MI": "Michigan", | |
| "MN": "Minnesota", | |
| "MO": "Missouri", | |
| "MS": "Mississippi", | |
| "MT": "Montana", | |
| "NC": "North Carolina", | |
| "ND": "North Dakota", | |
| "NE": "Nebraska", | |
| "NH": "New Hampshire", | |
| "NJ": "New Jersey", | |
| "NM": "New Mexico", | |
| "NV": "Nevada", | |
| "NY": "New York", | |
| "OH": "Ohio", | |
| "OK": "Oklahoma", | |
| "OR": "Oregon", | |
| "PA": "Pennsylvania", | |
| "RI": "Rhode Island", | |
| "SC": "South Carolina", | |
| "SD": "South Dakota", | |
| "TN": "Tennessee", | |
| "TX": "Texas", | |
| "UT": "Utah", | |
| "VA": "Virginia", | |
| "VT": "Vermont", | |
| "WA": "Washington", | |
| "WI": "Wisconsin", | |
| "WV": "West Virginia", | |
| "WY": "Wyoming", | |
| # https://en.wikipedia.org/wiki/List_of_states_and_territories_of_the_United_States#Federal_district. | |
| "DC": "District of Columbia", | |
| # https://en.wikipedia.org/wiki/List_of_states_and_territories_of_the_United_States#Inhabited_territories. | |
| "AS": "American Samoa", | |
| "GU": "Guam GU", | |
| "MP": "Northern Mariana Islands", | |
| "PR": "Puerto Rico PR", | |
| "VI": "U.S. Virgin Islands", | |
| } | |
| name_to_abbreviation = {value: key for key, value in abbreviation_to_name.items()} | |
| # Extract the year from the filename and add it as a new column | |
| df_2024['Year'] = 2024 | |
| df_2025['Year'] = 2025 | |
| # Concatenate the two DataFrames | |
| df_combined = pd.concat([df_2024, df_2025], ignore_index=True) | |
| # Group by `Name` and `Year` and sum the `TotalChurchMembership` | |
| df_grouped = df_combined.groupby(['Name', 'Year'])['TotalChurchMembership'].sum().unstack() | |
| # Calculate the percentage change from 2024 to 2025 | |
| df_grouped['Percent Change'] = ((df_grouped[2025] - df_grouped[2024]) / df_grouped[2024]) * 100 | |
| # Reset the index to make 'Name' a column | |
| df_result = df_grouped.reset_index() | |
| df_result.Name = df_result.Name.apply(lambda x: name_to_abbreviation.get(x, x)) | |
| df_result['region'] = df_result.Name.apply(lambda x: state_to_region.get(x,x)) | |
| # saving for when I combine with congregations later | |
| df_result0 = df_result.copy() | |
| # Define the color scale | |
| color_scale = [ | |
| (0, 'red'), # Negative change (red) | |
| (0.1, 'yellow'), # Little to no change (yellow) | |
| (1, 'green') # Positive change (green) | |
| ] | |
| # Create the heatmap | |
| fig = px.choropleth( | |
| df_result, | |
| locations='Name', # Use 'Name' as the location column (state abbreviations) | |
| locationmode="USA-states", # Specify US states | |
| color='Percent Change', | |
| color_continuous_scale=color_scale, | |
| scope="usa", # Restrict the map to the USA | |
| title='Percent Change in Total Church Membership By State (2023 to 2024)' | |
| ) | |
| # Update layout for background color, font, and caption | |
| fig.update_layout( | |
| paper_bgcolor="#f5f5f5", # Set background color | |
| font=dict(family="Cairo"), # Set font to Cairo | |
| annotations=[ | |
| dict( | |
| text=" @mormon_metrics<br>data:churchofjesuschrist.org", # Caption text | |
| x=1, # Center the caption | |
| y=-0.15, # Position below the chart | |
| showarrow=False, | |
| xanchor="right", | |
| font=dict(size=12, color="black") | |
| ) | |
| ] | |
| ) | |
| # Save the plot | |
| fig.show() | |
| # Save as png | |
| fig.write_image("../images/6_state_change_1.png") | |
| ### Now let's look at change in congregations ### | |
| # Group by `Name` and `Year` and sum the `Congregations` | |
| df_grouped = df_combined.groupby(['Name', 'Year'])['Congregations'].sum().unstack() | |
| # Calculate the percentage change from 2024 to 2025 | |
| df_grouped['Congregation Change'] = (df_grouped[2025] - df_grouped[2024]) | |
| # Reset the index to make 'Name' a column | |
| df_result = df_grouped.reset_index() | |
| df_result.Name = df_result.Name.apply(lambda x: name_to_abbreviation.get(x, x)) | |
| df_result2 = df_result.copy() | |
| df_result2['Congregation Change'] = np.where(df_result2['Congregation Change'] > 30, 30, df_result2['Congregation Change']) | |
| df_result2['Congregation Change'] = np.where(df_result2['Congregation Change'] < -30, -30,df_result2['Congregation Change']) | |
| # Define the color scale | |
| color_scale = [ | |
| (0, 'red'), # Negative change (red) | |
| (0.5, 'yellow'), # Little to no change (yellow) | |
| (1, 'green') # Positive change (green) | |
| ] | |
| # Create the heatmap | |
| fig = px.choropleth( | |
| df_result2, | |
| locations='Name', # Use 'Name' as the location column (state abbreviations) | |
| locationmode="USA-states", # Specify US states | |
| color='Congregation Change', | |
| color_continuous_scale=color_scale, | |
| scope="usa", # Restrict the map to the USA | |
| title='Change in Congregations By State (2023 to 2024)' | |
| ) | |
| # Update layout for background color, font, and caption | |
| fig.update_layout( | |
| paper_bgcolor="#f5f5f5", # Set background color | |
| font=dict(family="Cairo"), # Set font to Cairo | |
| annotations=[ | |
| dict( | |
| text=" @mormon_metrics<br>data:churchofjesuschrist.org", # Caption text | |
| x=1, # Center the caption | |
| y=-0.15, # Position below the chart | |
| showarrow=False, | |
| xanchor="right", | |
| font=dict(size=12, color="black") | |
| ) | |
| ] | |
| ) | |
| # Save the plot | |
| fig.show() | |
| # Save as png | |
| fig.write_image("../images/6_state_change_2.png") | |
| # Categorize the data into 3 groups | |
| df_result['Category'] = pd.cut( | |
| df_result['Congregation Change'], | |
| bins=[-float('inf'), -1, 0, float('inf')], # Define thresholds for categories | |
| labels=['Decrease', 'No Change', 'Increase'] # Labels for the categories | |
| ) | |
| # Define a discrete color scale | |
| color_discrete_map = { | |
| 'Decrease': 'red', # Color for decrease | |
| 'No Change': 'yellow', # Color for no change | |
| 'Increase': 'green' # Color for increase | |
| } | |
| # Create the choropleth map with categorical data | |
| fig = px.choropleth( | |
| df_result, | |
| locations='Name', # Use 'Name' as the location column | |
| locationmode="USA-states", | |
| color='Category', # Use the categorical column for color | |
| color_discrete_map=color_discrete_map, # Apply the discrete color scale | |
| scope="usa", | |
| title='Categorical Change in Congregations By State (2023 to 2024)' | |
| ) | |
| # Update layout for background color, font, and caption | |
| fig.update_layout( | |
| paper_bgcolor="#f5f5f5", # Set background color | |
| font=dict(family="Cairo"), # Set font to Cairo | |
| annotations=[ | |
| dict( | |
| text="@mormon_metrics<br>data:churchofjesuschrist.org", # Caption text | |
| x=1, # Right-align the caption | |
| y=-0.15, # Position below the chart | |
| showarrow=False, | |
| xanchor="right", | |
| font=dict(size=12, color="black") | |
| ) | |
| ] | |
| ) | |
| # Show the plot | |
| fig.show() | |
| # Save as png | |
| fig.write_image("../images/6_state_change_3.png") | |
| df_result0 = df_result0.rename(columns={2024: 'members_2024', 2025: 'members_2025'}) | |
| # Finding top 10 and bottom 10 countries | |
| df_top_n_bot = df_result \ | |
| [['Name', 'Congregation Change']] \ | |
| .merge(df_result0[['Name', 'Percent Change', 'members_2024', 'members_2025', 'region']], on='Name') \ | |
| .fillna(0) | |
| df_top_n_bot['cong_percentiles'] = df_top_n_bot['Congregation Change'].rank(pct=True) * 100 | |
| df_top_n_bot['memb_percentiles'] = df_top_n_bot['Percent Change'].rank(pct=True) * 100 | |
| df_top_n_bot['combinded_percentiles'] = (df_top_n_bot['cong_percentiles'] + df_top_n_bot['memb_percentiles']) / 2 | |
| #top countries | |
| df_top_n_bot \ | |
| .sort_values('combinded_percentiles',ascending=False) \ | |
| [['Name', 'Congregation Change', 'members_2024','members_2025', 'Percent Change', 'combinded_percentiles']] \ | |
| .head(10) \ | |
| .to_csv('~/Desktop/Top_10_Countries.csv', index=False) | |
| # bottom countries | |
| df_top_n_bot \ | |
| .sort_values('combinded_percentiles',ascending=True) \ | |
| [['Name', 'Congregation Change', 'members_2024','members_2025', 'Percent Change', 'combinded_percentiles']] \ | |
| .head(10) \ | |
| .to_csv('~/Desktop/Bottom_10_Countries.csv', index=False) | |
| # top by people added | |
| df_top_n_bot['diff'] = df_top_n_bot['members_2025'] - df_top_n_bot['members_2024'] | |
| df_top_n_bot['part_of_total'] = (df_top_n_bot['diff'] / df_top_n_bot['diff'].sum() * 100).round(1) | |
| df_top_n_bot[['Name','members_2024','members_2025','Percent Change', 'diff', 'part_of_total']] \ | |
| .sort_values('diff', ascending=False) \ | |
| .head(10) \ | |
| .to_csv('~/Desktop/Top_10_Countries_by_people_added.csv', index=False) | |
| # total member change by region | |
| df_top_n_bot \ | |
| .groupby('region') \ | |
| .agg({'diff':'sum'}) \ | |
| .reset_index() \ | |
| .assign(percent_of_total=lambda x: x['diff'] / x['diff'].sum()) | |
| df_top_n_bot \ | |
| .groupby('region') \ | |
| .agg({'members_2025':'sum'}) \ | |
| .reset_index() \ | |
| .assign(percent_of_total=lambda x: x['members_2025'] / x['members_2025'].sum()) | |
| # Create a treemap | |
| fig = px.treemap( | |
| df_top_n_bot.query('diff > 0'), # Filter for positive changes | |
| path=['region', 'Name'], # Group by region, then state | |
| values='part_of_total', # Size of the boxes based on the 'diff' column | |
| color='region', # Color by region | |
| color_discrete_map={ | |
| "West": "blue", | |
| "South": "green", | |
| "Northeast": "orange", | |
| "Midwest": "purple" | |
| }, # Assign specific colors to regions | |
| title="Total Membership Change as Proportion by State and Region (2023 to 2024)" | |
| ) | |
| # Update layout for better readability | |
| fig.update_layout( | |
| font=dict(family="Cairo", size=14), # Set font | |
| paper_bgcolor="#f5f5f5", # Set background color | |
| margin=dict(t=50, l=25, r=25, b=40), # Adjust margins | |
| annotations=[ | |
| dict( | |
| text="@mormon_metrics<br>data:churchofjesuschrist.org", # Caption text | |
| x=1, # Right-align the caption | |
| y=-0.1, # Position below the chart | |
| showarrow=False, | |
| xanchor="right", | |
| font=dict(size=12, color="black") | |
| ) | |
| ] | |
| ) | |
| fig.data[0].textinfo = 'label+value' # Show both the state name (label) and the diff value | |
| # Show the treemap | |
| fig.show() | |
| # Create a treemap | |
| fig = px.treemap( | |
| df_top_n_bot.assign(percent_of_total = lambda x:(x['members_2025']/x['members_2025'].sum() * 100).round(1)), | |
| path=['region', 'Name'], # Group by region, then state | |
| values='percent_of_total', # Size of the boxes based on the 'diff' column | |
| color='region', # Color by region | |
| color_discrete_map={ | |
| "West": "blue", | |
| "South": "green", | |
| "Northeast": "orange", | |
| "Midwest": "purple" | |
| }, # Assign specific colors to regions | |
| title="Total Membership as a Proportion by State and Region in 2024" | |
| ) | |
| # Update layout for better readability | |
| fig.update_layout( | |
| font=dict(family="Cairo", size=14), # Set font | |
| paper_bgcolor="#f5f5f5", # Set background color | |
| margin=dict(t=50, l=25, r=25, b=40), # Adjust margins | |
| annotations=[ | |
| dict( | |
| text="@mormon_metrics<br>data:churchofjesuschrist.org", # Caption text | |
| x=1, # Right-align the caption | |
| y=-0.1, # Position below the chart | |
| showarrow=False, | |
| xanchor="right", | |
| font=dict(size=12, color="black") | |
| ) | |
| ] | |
| ) | |
| fig.data[0].textinfo = 'label+value' # Show both the state name (label) and the diff value | |
| # Show the treemap | |
| fig.show() | |
| # adding in the CES data to show the proportion of self-identified members over time | |
| data = pd.read_stata('../data/CES/CES24_Common.dta') | |
| old_data = pd.read_stata('../data/CES/cumulative_2006-2023.dta') | |
| st.get_names(data, "state") | |
| data['state'] = data.inputstate | |
| data['jellobelt'] = data.state.isin(['Idaho', 'Utah', 'Arizona']) | |
| data['Mormon'] = data['religpew'].isin(['Mormon']) | |
| data['year'] = 2024 | |
| data = data.query('jellobelt == True') | |
| old_data['jellobelt'] = old_data.state.isin(['Idaho', 'Utah', 'Arizona']) | |
| old_data['Mormon'] = old_data['religion'].isin(['Mormon']) | |
| old_data = old_data.query('jellobelt == True') | |
| old_data['year'] | |
| final_data = pd.concat( | |
| [data[['state', 'Mormon', 'year']], | |
| old_data[['state', 'Mormon', 'year']]], ignore_index=True) | |
| st.tabs(final_data.query('state == "Idaho"'), 'Mormon', 'year', display='column') \ | |
| .sort_index(ascending=False) \ | |
| .head(1) \ | |
| .to_csv('~/Desktop/Idaho.csv', index=False) | |
| st.tabs(final_data.query('state == "Arizona"'), 'Mormon', 'year', display='column') \ | |
| .sort_index(ascending=False) \ | |
| .head(1) \ | |
| .to_csv('~/Desktop/Arizona.csv', index=False) | |
| st.tabs(final_data.query('state == "Utah"'), 'Mormon', 'year', display='column') \ | |
| .sort_index(ascending=False) \ | |
| .head(1) \ | |
| .to_csv('~/Desktop/Utah.csv', index=False) | |
| #gist https://gist.github.com/acbass49/6cda44b7ab4fab8ff58c182b75621084 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment