Skip to content

Instantly share code, notes, and snippets.

@Klafyvel
Created April 29, 2020 08:39
Show Gist options
  • Save Klafyvel/befccef4adb30658bed2539651caaea4 to your computer and use it in GitHub Desktop.
Save Klafyvel/befccef4adb30658bed2539651caaea4 to your computer and use it in GitHub Desktop.
import re
import pathlib
import csv
import requests
from bs4 import BeautifulSoup
reg = re.compile(r"[\w\séàèâäîï]+\((?P<dep>((0[0-9][0-9])|(97[0-9])))\w+\)")
FIRST_LINE = [
"Ville",
"Etablissement",
"Inscrits",
"Inscritsfilles",
"Admissibles",
"Admissiblesfilles",
"Classés",
"Classésfilles",
"Intégrés",
"Intégrésfilles"
]
URL_BASE = "https://www.scei-concours.fr/stat2019/lycee_{}/cs_{}.html"
DIR = pathlib.Path(__file__).parent.absolute()
BANKS = (
'mp',
'pc',
'pt',
'psi',
'tsi'
)
def url(bank):
return URL_BASE.format(bank, bank)
def page(bank):
r = requests.get(url(bank))
r.raise_for_status()
return r.text
def soupify(page):
return BeautifulSoup(page, features="html.parser")
def rows(soup):
return soup.find_all('tr')
def cells(row):
return row.find_all('td')
def heads(row):
return row.find_all('th')
def content(cell):
return cell.text
def find_dep(cell_dep):
return reg.search(cell_dep)["dep"]
def do_bank(bank):
f = open(DIR / ("bank_%s.csv" % bank), 'w')
writer = csv.writer(f)
soup = soupify(page(bank))
soup_rows = rows(soup)
# head but SCEI html is broken...
#writer.writerow(list(map(content, heads(soup_rows[0]))))
writer.writerow(FIRST_LINE)
# data
for row in soup_rows[1:]:
texts = list(map(content, cells(row)))
try:
texts[1] = find_dep(texts[1])
except TypeError: # that's not a french prep school
# print("Dropped %r" % texts)
continue
writer.writerow(texts)
f.close()
for bank in BANKS:
do_bank(bank)
using CSV
using DataFrames
using StatsPlots
BANKS = ["mp" "pc" "pt" "psi" "tsi"]
data = Dict([bank=>CSV.read("bank_"*bank*".csv") for bank in BANKS]);
PARIS = [ 75 77 78 91 92 93 94 95 ];
function do_bank(bank)
paris= (x->x ∈ PARIS).(data[bank][:, :Etablissement])
elsewhere = .!paris
select_dep(dep) = data[bank][:, :Etablissement] .== dep
men_from_elsewhere = sum(data[bank][elsewhere, :Intégrés] - data[bank][elsewhere, :Intégrésfilles])
women_from_elsewhere = sum(data[bank][elsewhere, :Intégrésfilles])
men_from_paris = [
sum(data[bank][select_dep(dep), :Intégrés] - data[bank][select_dep(dep), :Intégrésfilles])
for dep in PARIS
]
women_from_paris = [
sum(data[bank][select_dep(dep), :Intégrésfilles])
for dep in PARIS
]
deps = vec(["Total" "Autre" string.(PARIS)])
total_men = sum([men_from_elsewhere men_from_paris])
total_women = sum([women_from_elsewhere women_from_paris])
men = vec([total_men men_from_elsewhere men_from_paris])
women = vec([total_women women_from_elsewhere women_from_paris])
people_from_paris = sum(men_from_paris) + sum(women_from_paris)
total_people = people_from_paris + men_from_elsewhere + women_from_elsewhere
prop = round(people_from_paris / total_people * 100, digits=2)
title = "Banque $bank integres promo 2019 CS\nIssus de prepas francaises\nProportion issus de prepa parisienne : $(string(prop)) %"
groupedbar(
deps,
[men women],
bar_position = :stack,
bar_width=0.7, labels=["Hommes" "Femmes"],
xlabel="Departement de Prepa", ylabel="Nombre d'integres", title=title
)
savefig("integres_$(bank)_2019.png")
plot!()
end
plots = [
do_bank(bank) for bank in BANKS
]
plot(plots)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment