kylrth/run.py

## run.py
import pandas as pd


def get_historical_rates(country: str) -> pd.DataFrame:
    # https://ourworldindata.org/grapher/population-growth-rate-with-and-without-migration
    df = pd.read_csv("ourworldindata.csv")
    df = df[df["Entity"] == country]
    df = df.drop(["Entity", "Code"], axis=1)
    df = df.rename(
        columns={
            "Year": "year",
            "Growth rate - Sex: all - Age: all - Variant: estimates": "total_rate",
            "Natural growth rate - Sex: all - Age: all - Variant: estimates": "natural_rate",
        }
    )
    df = df.set_index("year")

    return df


def idx_and_value(s: pd.Series, i: int):
    return s.index[i], s.iloc[i]


def main():
    historical = {
        "US": get_historical_rates("United States"),
        "India": get_historical_rates("India"),
        "Malaysia": get_historical_rates("Malaysia"),
    }

    # https://en.wikipedia.org/wiki/List_of_countries_by_population_growth_rate
    # https://en.wikipedia.org/wiki/List_of_sovereign_states_by_natural_increase
    df = pd.read_csv("world_pop.csv", index_col="country")
    world_ch = df["cia_percent"][df["cia_year"] == 2021]
    df = pd.read_csv("world_nat_pop.csv", index_col="country")
    world_nat_ch = df["natural_increase_rate"] / 10
    world_nat_ch.name = "natural_rate"
    df = pd.DataFrame({"total_rate": world_ch}).join(world_nat_ch, how="inner")
    df["immigration"] = df["total_rate"] - df["natural_rate"]
    world = df

    # print out interesting stats

    for country in historical:
        s = historical[country]["natural_rate"]
        i = s.argmax()
        print(f"highest natural growth in {country}: {s.iloc[i]}%, in {s.index[i]}")

        i = s.argmin()
        print(f"lowest natural growth in {country}: {s.iloc[i]}%, in {s.index[i]}")

    print("\ncurrent stats for select countries, sorted by immigration rate:")
    print(
        world.loc[
            [
                "Australia",
                "Canada",
                "France",
                "Germany",
                "Greece",
                "India",
                "Italy",
                "Japan",
                "New Zealand",
                "South Korea",
                "Sweden",
                "United Kingdom",
                "United States",
            ]
        ].sort_values("immigration")
    )

    print("\ncountries experiencing the highest rates of attrition:")
    print(world.sort_values(by="immigration", ascending=True).head(10))

    print("\ncountries experiencing the highest natural growth:")
    print(world.sort_values(by="natural_rate", ascending=False).head(10))


if __name__ == "__main__":
    pd.set_option("display.max_rows", 500)
    main()
	import pandas as pd


	def get_historical_rates(country: str) -> pd.DataFrame:
	# https://ourworldindata.org/grapher/population-growth-rate-with-and-without-migration
	df = pd.read_csv("ourworldindata.csv")
	df = df[df["Entity"] == country]
	df = df.drop(["Entity", "Code"], axis=1)
	df = df.rename(
	columns={
	"Year": "year",
	"Growth rate - Sex: all - Age: all - Variant: estimates": "total_rate",
	"Natural growth rate - Sex: all - Age: all - Variant: estimates": "natural_rate",
	}
	)
	df = df.set_index("year")

	return df


	def idx_and_value(s: pd.Series, i: int):
	return s.index[i], s.iloc[i]


	def main():
	historical = {
	"US": get_historical_rates("United States"),
	"India": get_historical_rates("India"),
	"Malaysia": get_historical_rates("Malaysia"),
	}

	# https://en.wikipedia.org/wiki/List_of_countries_by_population_growth_rate
	# https://en.wikipedia.org/wiki/List_of_sovereign_states_by_natural_increase
	df = pd.read_csv("world_pop.csv", index_col="country")
	world_ch = df["cia_percent"][df["cia_year"] == 2021]
	df = pd.read_csv("world_nat_pop.csv", index_col="country")
	world_nat_ch = df["natural_increase_rate"] / 10
	world_nat_ch.name = "natural_rate"
	df = pd.DataFrame({"total_rate": world_ch}).join(world_nat_ch, how="inner")
	df["immigration"] = df["total_rate"] - df["natural_rate"]
	world = df

	# print out interesting stats

	for country in historical:
	s = historical[country]["natural_rate"]
	i = s.argmax()
	print(f"highest natural growth in {country}: {s.iloc[i]}%, in {s.index[i]}")

	i = s.argmin()
	print(f"lowest natural growth in {country}: {s.iloc[i]}%, in {s.index[i]}")

	print("\ncurrent stats for select countries, sorted by immigration rate:")
	print(
	world.loc[
	[
	"Australia",
	"Canada",
	"France",
	"Germany",
	"Greece",
	"India",
	"Italy",
	"Japan",
	"New Zealand",
	"South Korea",
	"Sweden",
	"United Kingdom",
	"United States",
	]
	].sort_values("immigration")
	)

	print("\ncountries experiencing the highest rates of attrition:")
	print(world.sort_values(by="immigration", ascending=True).head(10))

	print("\ncountries experiencing the highest natural growth:")
	print(world.sort_values(by="natural_rate", ascending=False).head(10))


	if __name__ == "__main__":
	pd.set_option("display.max_rows", 500)
	main()