-
-
Save OpesMentis/30f13628d6fd25390de5d98146d794dd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from lxml import html | |
import json | |
# Constitution de la iste des départements | |
l_dpts = list(range(1, 96)) + list(range(971, 977)) + [988] | |
departements = [] | |
for d in l_dpts: | |
s_d = str(d) | |
s_d = '0'*(3-len(s_d)) + s_d | |
departements.append(s_d) | |
departements = departements[:19] + ['02A', '02B'] + departements[20:] | |
base_url = 'https://elections.interieur.gouv.fr/municipales-2020/' | |
# Parcours des départements pour récupérer les initiales | |
l_initiales = {} | |
base_path = '/html/body/div/div[2]/div[1]/div[3]/div/a' | |
for d in departements: | |
url = base_url + f'{d}/index.html' | |
page = requests.get(url) | |
tree = html.fromstring(page.content) | |
l_lettres = [] | |
len_lettres = len(tree.xpath(base_path)) | |
for i in range(1, len_lettres+1): | |
lettre = tree.xpath(base_path + f'[{i}]/text()')[0] | |
l_lettres.append(lettre) | |
l_initiales[d] = l_lettres | |
print('Parcours des départements OK !') | |
# Parcours des villes | |
villes = {} | |
base_path = '/html/body/div/div[2]/div[1]/div[4]/div/table[1]/tbody/tr' | |
for d in departements: | |
villes[d] = {} | |
for l in l_initiales[d]: | |
url = base_url + f'{d}/{d+l}.html' | |
page = requests.get(url) | |
tree = html.fromstring(page.content) | |
len_villes = len(tree.xpath(base_path)) | |
for i in range(1, len_villes+1): | |
ville = tree.xpath(base_path + f'[{i}]/td/text()')[0] | |
mode = tree.xpath(base_path + f'[{i}]/td[2]/a/text()')[0] | |
code = tree.xpath(base_path + f'[{i}]/td[2]/a/@href')[0].split('/')[-1][2:-5] | |
villes[d][code] = {'nom': ville, 'mode': mode} | |
print(d + ' : OK !') | |
# Traitement particulier de Paris, Marseille et Lyon | |
# Chaque secteur de ces villes est traité comme une ville « normale » soumis au scrutin de listes | |
for i in ['013055', '069123', '075056']: | |
url = base_url + f'{i[:3]}/C1{i}.html' | |
page = requests.get(url) | |
tree = html.fromstring(page.content) | |
len_secteurs = len(tree.xpath(base_path)) | |
for j in range(1, len_secteurs+1): | |
secteur = tree.xpath(base_path + f'[{j}]/td/a/text()')[0] | |
mode = 'Listes' | |
code = tree.xpath(base_path + f'[{j}]/td/a/@href')[0].split('/')[-1][2:-5] | |
villes[i[:3]][code] = {'nom': secteur, 'mode': mode} | |
del villes['013']['013055'] | |
del villes['069']['069123'] | |
print('Parcours des villes OK !') | |
# Parcours des listes | |
f_out = open('candidats-municipales-2020.json', 'w') | |
f_out.write('{') | |
for d in departements: | |
data = villes[d].copy() | |
for c in villes[d]: | |
url = base_url + f'{c[:3]}/C1{c}.html' | |
page = requests.get(url) | |
tree = html.fromstring(page.content) | |
base_path = '/html/body/div/div[2]/div[1]/div[4]/div/table/tbody/tr' | |
if data[c]["mode"] == 'Listes': | |
data[c]["listes"] = {} | |
len_listes = len(tree.xpath(base_path)) | |
for i in range(2, len_listes+1): | |
liste = tree.xpath(base_path + f'[{i}]/td[1]/a/text()')[0] | |
code = tree.xpath(base_path + f'[{i}]/td[1]/a/@href')[0][-9:-5] | |
is_nuance = len(tree.xpath(base_path + '[1]/th')) == 3 | |
nuance = tree.xpath(base_path + f'[{i}]/td[2]/text()')[0] if is_nuance else 'XXX' | |
url_candidats = base_url + f'{c[:3]}/C1{c+code}.html' | |
page_candidats = requests.get(url_candidats) | |
tree_candidats = html.fromstring(page_candidats.content) | |
candidats = [] | |
len_candidats = len(tree_candidats.xpath(base_path)) | |
for j in range(2, len_candidats+1): | |
candidats.append(tree_candidats.xpath(base_path + f'[{j}]/td[1]/text()')[1]) | |
data[c]["listes"][code] = {"nom": liste, "nuance": nuance, "candidats": candidats} | |
elif data[c]["mode"] == 'Candidats (Scrutin majoritaire)': | |
data[c]["candidats"] = [] | |
len_candidats = len(tree.xpath(base_path)) | |
for i in range(1, len_candidats+1): | |
data[c]["candidats"] += tree.xpath(base_path + f'[{i}]/td/text()') | |
s = json.dumps(data, ensure_ascii=False) | |
f_out.write(f'"{d}": {s},') | |
print(d + ' : OK !') | |
print('Parcours des listes OK !') | |
f_out.write('}') | |
f_out.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment