Skip to content

Instantly share code, notes, and snippets.

Created December 19, 2022 17:47
Show Gist options
  • Save kylrth/b75dbf2dbc410bb7b636baa8938e660e to your computer and use it in GitHub Desktop.
Save kylrth/b75dbf2dbc410bb7b636baa8938e660e to your computer and use it in GitHub Desktop.
some population and immigration data sleuthing
import pandas as pd
def get_historical_rates(country: str) -> pd.DataFrame:
df = pd.read_csv("ourworldindata.csv")
df = df[df["Entity"] == country]
df = df.drop(["Entity", "Code"], axis=1)
df = df.rename(
"Year": "year",
"Growth rate - Sex: all - Age: all - Variant: estimates": "total_rate",
"Natural growth rate - Sex: all - Age: all - Variant: estimates": "natural_rate",
df = df.set_index("year")
return df
def idx_and_value(s: pd.Series, i: int):
return s.index[i], s.iloc[i]
def main():
historical = {
"US": get_historical_rates("United States"),
"India": get_historical_rates("India"),
"Malaysia": get_historical_rates("Malaysia"),
df = pd.read_csv("world_pop.csv", index_col="country")
world_ch = df["cia_percent"][df["cia_year"] == 2021]
df = pd.read_csv("world_nat_pop.csv", index_col="country")
world_nat_ch = df["natural_increase_rate"] / 10 = "natural_rate"
df = pd.DataFrame({"total_rate": world_ch}).join(world_nat_ch, how="inner")
df["immigration"] = df["total_rate"] - df["natural_rate"]
world = df
# print out interesting stats
for country in historical:
s = historical[country]["natural_rate"]
i = s.argmax()
print(f"highest natural growth in {country}: {s.iloc[i]}%, in {s.index[i]}")
i = s.argmin()
print(f"lowest natural growth in {country}: {s.iloc[i]}%, in {s.index[i]}")
print("\ncurrent stats for select countries, sorted by immigration rate:")
"New Zealand",
"South Korea",
"United Kingdom",
"United States",
print("\ncountries experiencing the highest rates of attrition:")
print(world.sort_values(by="immigration", ascending=True).head(10))
print("\ncountries experiencing the highest natural growth:")
print(world.sort_values(by="natural_rate", ascending=False).head(10))
if __name__ == "__main__":
pd.set_option("display.max_rows", 500)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment