Skip to content

Instantly share code, notes, and snippets.

Last active November 22, 2022 07:58
What would you like to do?
import urllib.parse
import urllib.request
import re
import json
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
page = "List_of_chess_grandmasters"
def wiki_to_json_dict(title):
safe_title = urllib.parse.quote(title)
response = urllib.request.urlopen(''+safe_title+'&prop=revisions&rvprop=content&format=json')
markup =
output_file = open('data.json', 'wb')
json_data = open('data.json', 'r')
data = json.load(json_data)
return data
def extract_text_from_article(structure):
pageid = list(structure['query']['pages'].keys())[0]
return (structure['query']['pages'][pageid]['revisions'])[0]['*']
grandmasters = extract_text_from_article(wiki_to_json_dict(page))
regex = ".*?(\d{4})-\d{2}-\d{2}.*?\|\|(\d{4})\|\|\{\{(.*?)\}\}"
dates = re.findall(regex, grandmasters)
born_after = 1945
ages = [int(date[1]) - int(date[0]) for date in dates if int(date[1]) - int(date[0]) > 5 and int(date[0]) > born_after]
age_counts = Counter(ages)
age_average = np.mean(ages)
countries = Counter([date[2] for date in dates])
labels, values = zip(*age_counts.items())
indexes = np.arange(len(labels))
width = 1, values, width)
plt.xlabel("Reached GM at age")
plt.ylabel("Number of players")
plt.xticks(indexes, labels, rotation='vertical', size=8)
plt.yticks(np.arange(0, 121, 10))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment