alexandrehuat/population_vs_temperature.py

## population_vs_temperature.py
"""
Purpose: To study how world population influences global temperature.

Results: Standardised temperature anomaly closely fit standardised population anomaly.
Standardized green house gaz emissions (GHGE) do not fit standardized temperature anomaly.
Univariate linear regression of population growth and GHGE fit temperature well.

Conclusion: Population growth seems to predict global temperature anomaly, at least since 1850.
Humans seem to sources of temperature by themselves.
A lack of GHGE data and the absence of direct fit (from standardized data) prevent us from being more affirmative regarding the link between GHGE and temperature.

Data sources:
* Population: https://ourworldindata.org/grapher/population
* Green house gaz emissions: https://github.com/owid/co2-data
* Temperature: https://ourworldindata.org/grapher/temperature-anomaly
"""

import pandas as pd
import seaborn as sns
sns.set_style("whitegrid")
import matplotlib.pyplot as plt
plt.rcParams["figure.dpi"] = 300


def load_data(fp, column, rename=None):
    data = pd.read_csv(fp)
    for c in ["Entity", "country"]:
        if c in data.columns:
            break
    data = data.loc[data[c].eq("World") | data[c].eq("Global")]
    data.rename(columns={"year": "Year"}, inplace=True)
    name = column if rename is None else rename
    return pd.Series(data[column].values, data["Year"].values, name=name)

def savefig(ax, fp, **kws):
    fig = ax.get_figure()
    fig.tight_layout()
    fig.savefig(fp, **kws)


if __name__ == "__main__":
    # Load data
    data = pd.concat([
        load_data("population.csv", "Population (historical estimates)", "Population"),
        load_data("owid-co2-data.csv", "total_ghg", "Green house gaz emissions (Mt of CO2-equivalents)"),
        load_data("temperature-anomaly.csv", "Global average temperature anomaly relative to 1961-1990", "Temperature anomaly (°C)")
    ], axis=1)
    mask = data.iloc[:, -1].notna()
    data = data.loc[mask]

    # Standardize data
    mean = data.mean(axis=0)
    std = data.std(axis=0)
    norm_data = (data - mean) / std

    # Plot data by year
    ax = sns.lineplot(norm_data)
    ax.set_title("Source: Our World in Data")
    ax.set_xlabel("Year")
    ax.set_ylabel("Standardised scale")
    savefig(ax, "lineplot.png")

    # Linear regression
    for i in range(data.shape[1] - 1):
        kws = dict(zip("xy", data.columns[[i, -1]]))
        joint_grid = sns.jointplot(data, **kws, kind="reg")
        joint_grid._figure.savefig(f"jointplot_{i}.png")
        plt.figure()
        ax = sns.residplot(data, **kws)
        ax.set_title("Linear regression residuals")
        savefig(ax, f"linreg_residuals_{i}.png")
	"""
	Purpose: To study how world population influences global temperature.

	Results: Standardised temperature anomaly closely fit standardised population anomaly.
	Standardized green house gaz emissions (GHGE) do not fit standardized temperature anomaly.
	Univariate linear regression of population growth and GHGE fit temperature well.

	Conclusion: Population growth seems to predict global temperature anomaly, at least since 1850.
	Humans seem to sources of temperature by themselves.
	A lack of GHGE data and the absence of direct fit (from standardized data) prevent us from being more affirmative regarding the link between GHGE and temperature.

	Data sources:
	* Population: https://ourworldindata.org/grapher/population
	* Green house gaz emissions: https://github.com/owid/co2-data
	* Temperature: https://ourworldindata.org/grapher/temperature-anomaly
	"""

	import pandas as pd
	import seaborn as sns
	sns.set_style("whitegrid")
	import matplotlib.pyplot as plt
	plt.rcParams["figure.dpi"] = 300


	def load_data(fp, column, rename=None):
	data = pd.read_csv(fp)
	for c in ["Entity", "country"]:
	if c in data.columns:
	break
	data = data.loc[data[c].eq("World") \| data[c].eq("Global")]
	data.rename(columns={"year": "Year"}, inplace=True)
	name = column if rename is None else rename
	return pd.Series(data[column].values, data["Year"].values, name=name)

	def savefig(ax, fp, **kws):
	fig = ax.get_figure()
	fig.tight_layout()
	fig.savefig(fp, **kws)


	if __name__ == "__main__":
	# Load data
	data = pd.concat([
	load_data("population.csv", "Population (historical estimates)", "Population"),
	load_data("owid-co2-data.csv", "total_ghg", "Green house gaz emissions (Mt of CO2-equivalents)"),
	load_data("temperature-anomaly.csv", "Global average temperature anomaly relative to 1961-1990", "Temperature anomaly (°C)")
	], axis=1)
	mask = data.iloc[:, -1].notna()
	data = data.loc[mask]

	# Standardize data
	mean = data.mean(axis=0)
	std = data.std(axis=0)
	norm_data = (data - mean) / std

	# Plot data by year
	ax = sns.lineplot(norm_data)
	ax.set_title("Source: Our World in Data")
	ax.set_xlabel("Year")
	ax.set_ylabel("Standardised scale")
	savefig(ax, "lineplot.png")

	# Linear regression
	for i in range(data.shape[1] - 1):
	kws = dict(zip("xy", data.columns[[i, -1]]))
	joint_grid = sns.jointplot(data, **kws, kind="reg")
	joint_grid._figure.savefig(f"jointplot_{i}.png")
	plt.figure()
	ax = sns.residplot(data, **kws)
	ax.set_title("Linear regression residuals")
	savefig(ax, f"linreg_residuals_{i}.png")