Skip to content

Instantly share code, notes, and snippets.

@GloriaVictis
Last active November 22, 2022 07:58
Embed
What would you like to do?
import urllib.parse
import urllib.request
import re
import json
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
page = "List_of_chess_grandmasters"
def wiki_to_json_dict(title):
safe_title = urllib.parse.quote(title)
response = urllib.request.urlopen('http://en.wikipedia.org/w/api.php?action=query&titles='+safe_title+'&prop=revisions&rvprop=content&format=json')
markup = response.read()
output_file = open('data.json', 'wb')
output_file.write(markup)
output_file.close()
json_data = open('data.json', 'r')
data = json.load(json_data)
json_data.close()
return data
def extract_text_from_article(structure):
pageid = list(structure['query']['pages'].keys())[0]
return (structure['query']['pages'][pageid]['revisions'])[0]['*']
grandmasters = extract_text_from_article(wiki_to_json_dict(page))
regex = ".*?(\d{4})-\d{2}-\d{2}.*?\|\|(\d{4})\|\|\{\{(.*?)\}\}"
dates = re.findall(regex, grandmasters)
born_after = 1945
ages = [int(date[1]) - int(date[0]) for date in dates if int(date[1]) - int(date[0]) > 5 and int(date[0]) > born_after]
age_counts = Counter(ages)
age_average = np.mean(ages)
countries = Counter([date[2] for date in dates])
print(age_average)
print(countries)
labels, values = zip(*age_counts.items())
indexes = np.arange(len(labels))
width = 1
plt.bar(indexes, values, width)
plt.xlabel("Reached GM at age")
plt.ylabel("Number of players")
plt.xticks(indexes, labels, rotation='vertical', size=8)
plt.yticks(np.arange(0, 121, 10))
plt.savefig("ages.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment