Skip to content

Instantly share code, notes, and snippets.

@kylrth
Created December 19, 2022 17:47
Show Gist options
  • Save kylrth/b75dbf2dbc410bb7b636baa8938e660e to your computer and use it in GitHub Desktop.
Save kylrth/b75dbf2dbc410bb7b636baa8938e660e to your computer and use it in GitHub Desktop.
some population and immigration data sleuthing
import pandas as pd
def get_historical_rates(country: str) -> pd.DataFrame:
# https://ourworldindata.org/grapher/population-growth-rate-with-and-without-migration
df = pd.read_csv("ourworldindata.csv")
df = df[df["Entity"] == country]
df = df.drop(["Entity", "Code"], axis=1)
df = df.rename(
columns={
"Year": "year",
"Growth rate - Sex: all - Age: all - Variant: estimates": "total_rate",
"Natural growth rate - Sex: all - Age: all - Variant: estimates": "natural_rate",
}
)
df = df.set_index("year")
return df
def idx_and_value(s: pd.Series, i: int):
return s.index[i], s.iloc[i]
def main():
historical = {
"US": get_historical_rates("United States"),
"India": get_historical_rates("India"),
"Malaysia": get_historical_rates("Malaysia"),
}
# https://en.wikipedia.org/wiki/List_of_countries_by_population_growth_rate
# https://en.wikipedia.org/wiki/List_of_sovereign_states_by_natural_increase
df = pd.read_csv("world_pop.csv", index_col="country")
world_ch = df["cia_percent"][df["cia_year"] == 2021]
df = pd.read_csv("world_nat_pop.csv", index_col="country")
world_nat_ch = df["natural_increase_rate"] / 10
world_nat_ch.name = "natural_rate"
df = pd.DataFrame({"total_rate": world_ch}).join(world_nat_ch, how="inner")
df["immigration"] = df["total_rate"] - df["natural_rate"]
world = df
# print out interesting stats
for country in historical:
s = historical[country]["natural_rate"]
i = s.argmax()
print(f"highest natural growth in {country}: {s.iloc[i]}%, in {s.index[i]}")
i = s.argmin()
print(f"lowest natural growth in {country}: {s.iloc[i]}%, in {s.index[i]}")
print("\ncurrent stats for select countries, sorted by immigration rate:")
print(
world.loc[
[
"Australia",
"Canada",
"France",
"Germany",
"Greece",
"India",
"Italy",
"Japan",
"New Zealand",
"South Korea",
"Sweden",
"United Kingdom",
"United States",
]
].sort_values("immigration")
)
print("\ncountries experiencing the highest rates of attrition:")
print(world.sort_values(by="immigration", ascending=True).head(10))
print("\ncountries experiencing the highest natural growth:")
print(world.sort_values(by="natural_rate", ascending=False).head(10))
if __name__ == "__main__":
pd.set_option("display.max_rows", 500)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment