Skip to content

Instantly share code, notes, and snippets.

@OpesMentis
Created March 8, 2020 17:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save OpesMentis/30f13628d6fd25390de5d98146d794dd to your computer and use it in GitHub Desktop.
Save OpesMentis/30f13628d6fd25390de5d98146d794dd to your computer and use it in GitHub Desktop.
import requests
from lxml import html
import json
# Constitution de la iste des départements
l_dpts = list(range(1, 96)) + list(range(971, 977)) + [988]
departements = []
for d in l_dpts:
s_d = str(d)
s_d = '0'*(3-len(s_d)) + s_d
departements.append(s_d)
departements = departements[:19] + ['02A', '02B'] + departements[20:]
base_url = 'https://elections.interieur.gouv.fr/municipales-2020/'
# Parcours des départements pour récupérer les initiales
l_initiales = {}
base_path = '/html/body/div/div[2]/div[1]/div[3]/div/a'
for d in departements:
url = base_url + f'{d}/index.html'
page = requests.get(url)
tree = html.fromstring(page.content)
l_lettres = []
len_lettres = len(tree.xpath(base_path))
for i in range(1, len_lettres+1):
lettre = tree.xpath(base_path + f'[{i}]/text()')[0]
l_lettres.append(lettre)
l_initiales[d] = l_lettres
print('Parcours des départements OK !')
# Parcours des villes
villes = {}
base_path = '/html/body/div/div[2]/div[1]/div[4]/div/table[1]/tbody/tr'
for d in departements:
villes[d] = {}
for l in l_initiales[d]:
url = base_url + f'{d}/{d+l}.html'
page = requests.get(url)
tree = html.fromstring(page.content)
len_villes = len(tree.xpath(base_path))
for i in range(1, len_villes+1):
ville = tree.xpath(base_path + f'[{i}]/td/text()')[0]
mode = tree.xpath(base_path + f'[{i}]/td[2]/a/text()')[0]
code = tree.xpath(base_path + f'[{i}]/td[2]/a/@href')[0].split('/')[-1][2:-5]
villes[d][code] = {'nom': ville, 'mode': mode}
print(d + ' : OK !')
# Traitement particulier de Paris, Marseille et Lyon
# Chaque secteur de ces villes est traité comme une ville « normale » soumis au scrutin de listes
for i in ['013055', '069123', '075056']:
url = base_url + f'{i[:3]}/C1{i}.html'
page = requests.get(url)
tree = html.fromstring(page.content)
len_secteurs = len(tree.xpath(base_path))
for j in range(1, len_secteurs+1):
secteur = tree.xpath(base_path + f'[{j}]/td/a/text()')[0]
mode = 'Listes'
code = tree.xpath(base_path + f'[{j}]/td/a/@href')[0].split('/')[-1][2:-5]
villes[i[:3]][code] = {'nom': secteur, 'mode': mode}
del villes['013']['013055']
del villes['069']['069123']
print('Parcours des villes OK !')
# Parcours des listes
f_out = open('candidats-municipales-2020.json', 'w')
f_out.write('{')
for d in departements:
data = villes[d].copy()
for c in villes[d]:
url = base_url + f'{c[:3]}/C1{c}.html'
page = requests.get(url)
tree = html.fromstring(page.content)
base_path = '/html/body/div/div[2]/div[1]/div[4]/div/table/tbody/tr'
if data[c]["mode"] == 'Listes':
data[c]["listes"] = {}
len_listes = len(tree.xpath(base_path))
for i in range(2, len_listes+1):
liste = tree.xpath(base_path + f'[{i}]/td[1]/a/text()')[0]
code = tree.xpath(base_path + f'[{i}]/td[1]/a/@href')[0][-9:-5]
is_nuance = len(tree.xpath(base_path + '[1]/th')) == 3
nuance = tree.xpath(base_path + f'[{i}]/td[2]/text()')[0] if is_nuance else 'XXX'
url_candidats = base_url + f'{c[:3]}/C1{c+code}.html'
page_candidats = requests.get(url_candidats)
tree_candidats = html.fromstring(page_candidats.content)
candidats = []
len_candidats = len(tree_candidats.xpath(base_path))
for j in range(2, len_candidats+1):
candidats.append(tree_candidats.xpath(base_path + f'[{j}]/td[1]/text()')[1])
data[c]["listes"][code] = {"nom": liste, "nuance": nuance, "candidats": candidats}
elif data[c]["mode"] == 'Candidats (Scrutin majoritaire)':
data[c]["candidats"] = []
len_candidats = len(tree.xpath(base_path))
for i in range(1, len_candidats+1):
data[c]["candidats"] += tree.xpath(base_path + f'[{i}]/td/text()')
s = json.dumps(data, ensure_ascii=False)
f_out.write(f'"{d}": {s},')
print(d + ' : OK !')
print('Parcours des listes OK !')
f_out.write('}')
f_out.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment