-
-
Save jurand71/be30627568bd4f774fb760243d6aa3da to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import pandas as pd | |
import requests | |
from bs4 import BeautifulSoup | |
pageurl = requests.get("https://en.wikipedia.org/wiki/Global_Peace_Index") | |
soup = BeautifulSoup(pageurl.content, 'html.parser') | |
gpi2022table=soup.find('table',{'class':"mw-datatable wikitable sortable"}) | |
gpi2022table = soup.find_all('table')[1] | |
df = pd.read_html(str(gpi2022table)) | |
df_2022 = pd.DataFrame(df[0]) | |
df_2022 = df_2022.rename(columns={"Rank":"2022 rank","Score[4]": "2022 score"}) | |
gpitable = soup.find_all('table')[2] | |
df = pd.read_html(str(gpitable)) | |
df_all = pd.DataFrame(df[0]) | |
gpi_df = pd.merge(df_2022, df_all, how="left", on='Country') | |
columns_names = gpi_df.columns.tolist() | |
columns_names = [re.sub('\[\d*\]', '', w) for w in columns_names] | |
gpi_df.columns = columns_names | |
gpi_df = gpi_df.replace({'=':'','—':'','\[[a-z]\]':''}, regex=True) | |
df = pd.read_csv('countries.csv', usecols=['ISO_CODE','COUNTRY_NAME','Geographic Regions','EU Members']) | |
df.columns = ['ISO_Code','Country','Geographic Regions','EU Members'] | |
gpi_wide = pd.merge(gpi_df, df, on='Country', how='left') | |
list(gpi_wide.columns) | |
gpi_wide = gpi_wide[['Country','ISO_Code','Geographic Regions','EU Members', | |
'2022 rank','2022 score','2021 rank','2021 score','2020 rank','2020 score','2019 rank','2019 score', | |
'2018 rank','2018 score','2017 rank','2017 score','2016 rank','2016 score','2015 rank','2015 score', | |
'2014 rank','2014 score','2013 rank','2013 score','2012 rank','2012 score','2011 rank','2011 score', | |
'2010 rank','2010 score','2009 rank','2009 score','2008 rank','2008 score','2007 rank','2007 score']] | |
gpi_wide.to_csv("gpi_all_data_wide.csv", encoding="utf-8") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment