Skip to content

Instantly share code, notes, and snippets.

@jurand71
Created January 27, 2023 16:29
Show Gist options
  • Save jurand71/be30627568bd4f774fb760243d6aa3da to your computer and use it in GitHub Desktop.
Save jurand71/be30627568bd4f774fb760243d6aa3da to your computer and use it in GitHub Desktop.
import re
import pandas as pd
import requests
from bs4 import BeautifulSoup
pageurl = requests.get("https://en.wikipedia.org/wiki/Global_Peace_Index")
soup = BeautifulSoup(pageurl.content, 'html.parser')
gpi2022table=soup.find('table',{'class':"mw-datatable wikitable sortable"})
gpi2022table = soup.find_all('table')[1]
df = pd.read_html(str(gpi2022table))
df_2022 = pd.DataFrame(df[0])
df_2022 = df_2022.rename(columns={"Rank":"2022 rank","Score[4]": "2022 score"})
gpitable = soup.find_all('table')[2]
df = pd.read_html(str(gpitable))
df_all = pd.DataFrame(df[0])
gpi_df = pd.merge(df_2022, df_all, how="left", on='Country')
columns_names = gpi_df.columns.tolist()
columns_names = [re.sub('\[\d*\]', '', w) for w in columns_names]
gpi_df.columns = columns_names
gpi_df = gpi_df.replace({'=':'','—':'','\[[a-z]\]':''}, regex=True)
df = pd.read_csv('countries.csv', usecols=['ISO_CODE','COUNTRY_NAME','Geographic Regions','EU Members'])
df.columns = ['ISO_Code','Country','Geographic Regions','EU Members']
gpi_wide = pd.merge(gpi_df, df, on='Country', how='left')
list(gpi_wide.columns)
gpi_wide = gpi_wide[['Country','ISO_Code','Geographic Regions','EU Members',
'2022 rank','2022 score','2021 rank','2021 score','2020 rank','2020 score','2019 rank','2019 score',
'2018 rank','2018 score','2017 rank','2017 score','2016 rank','2016 score','2015 rank','2015 score',
'2014 rank','2014 score','2013 rank','2013 score','2012 rank','2012 score','2011 rank','2011 score',
'2010 rank','2010 score','2009 rank','2009 score','2008 rank','2008 score','2007 rank','2007 score']]
gpi_wide.to_csv("gpi_all_data_wide.csv", encoding="utf-8")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment