Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save thorwhalen/8856e280c8116c5043bd7fb4c72da181 to your computer and use it in GitHub Desktop.
Save thorwhalen/8856e280c8116c5043bd7fb4c72da181 to your computer and use it in GitHub Desktop.
Head of state salaries -- extraction and processing
# get raw df
import qo
df = qo.get_tables_from_url('https://en.wikipedia.org/wiki/List_of_salaries_of_heads_of_state_and_government')[3]
df = df.set_index('State')
# formatting and extracting salaries
import re
d = df[['Head of state annual salary']].dropna()
def extract(x):
if x and x != np.nan:
y = re.compile('[\d,]*[\.\d]* USD').search(x)
if y:
return y.group(0)
def to_int(x):
if x:
return int(re.compile('[^\d]*').sub('', x))
t = map(extract, d['Head of state annual salary'].values)
t = map(to_int, t)
d['salary_usd'] = list(t)
d = d['salary_usd'].dropna().sort_values(ascending=False)
d = d.map(lambda x: str(int(x)))
# printing a markdown table
print(d.to_markdown())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment