Created
April 29, 2020 08:39
-
-
Save Klafyvel/befccef4adb30658bed2539651caaea4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import pathlib | |
import csv | |
import requests | |
from bs4 import BeautifulSoup | |
reg = re.compile(r"[\w\séàèâäîï]+\((?P<dep>((0[0-9][0-9])|(97[0-9])))\w+\)") | |
FIRST_LINE = [ | |
"Ville", | |
"Etablissement", | |
"Inscrits", | |
"Inscritsfilles", | |
"Admissibles", | |
"Admissiblesfilles", | |
"Classés", | |
"Classésfilles", | |
"Intégrés", | |
"Intégrésfilles" | |
] | |
URL_BASE = "https://www.scei-concours.fr/stat2019/lycee_{}/cs_{}.html" | |
DIR = pathlib.Path(__file__).parent.absolute() | |
BANKS = ( | |
'mp', | |
'pc', | |
'pt', | |
'psi', | |
'tsi' | |
) | |
def url(bank): | |
return URL_BASE.format(bank, bank) | |
def page(bank): | |
r = requests.get(url(bank)) | |
r.raise_for_status() | |
return r.text | |
def soupify(page): | |
return BeautifulSoup(page, features="html.parser") | |
def rows(soup): | |
return soup.find_all('tr') | |
def cells(row): | |
return row.find_all('td') | |
def heads(row): | |
return row.find_all('th') | |
def content(cell): | |
return cell.text | |
def find_dep(cell_dep): | |
return reg.search(cell_dep)["dep"] | |
def do_bank(bank): | |
f = open(DIR / ("bank_%s.csv" % bank), 'w') | |
writer = csv.writer(f) | |
soup = soupify(page(bank)) | |
soup_rows = rows(soup) | |
# head but SCEI html is broken... | |
#writer.writerow(list(map(content, heads(soup_rows[0])))) | |
writer.writerow(FIRST_LINE) | |
# data | |
for row in soup_rows[1:]: | |
texts = list(map(content, cells(row))) | |
try: | |
texts[1] = find_dep(texts[1]) | |
except TypeError: # that's not a french prep school | |
# print("Dropped %r" % texts) | |
continue | |
writer.writerow(texts) | |
f.close() | |
for bank in BANKS: | |
do_bank(bank) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using CSV | |
using DataFrames | |
using StatsPlots | |
BANKS = ["mp" "pc" "pt" "psi" "tsi"] | |
data = Dict([bank=>CSV.read("bank_"*bank*".csv") for bank in BANKS]); | |
PARIS = [ 75 77 78 91 92 93 94 95 ]; | |
function do_bank(bank) | |
paris= (x->x ∈ PARIS).(data[bank][:, :Etablissement]) | |
elsewhere = .!paris | |
select_dep(dep) = data[bank][:, :Etablissement] .== dep | |
men_from_elsewhere = sum(data[bank][elsewhere, :Intégrés] - data[bank][elsewhere, :Intégrésfilles]) | |
women_from_elsewhere = sum(data[bank][elsewhere, :Intégrésfilles]) | |
men_from_paris = [ | |
sum(data[bank][select_dep(dep), :Intégrés] - data[bank][select_dep(dep), :Intégrésfilles]) | |
for dep in PARIS | |
] | |
women_from_paris = [ | |
sum(data[bank][select_dep(dep), :Intégrésfilles]) | |
for dep in PARIS | |
] | |
deps = vec(["Total" "Autre" string.(PARIS)]) | |
total_men = sum([men_from_elsewhere men_from_paris]) | |
total_women = sum([women_from_elsewhere women_from_paris]) | |
men = vec([total_men men_from_elsewhere men_from_paris]) | |
women = vec([total_women women_from_elsewhere women_from_paris]) | |
people_from_paris = sum(men_from_paris) + sum(women_from_paris) | |
total_people = people_from_paris + men_from_elsewhere + women_from_elsewhere | |
prop = round(people_from_paris / total_people * 100, digits=2) | |
title = "Banque $bank integres promo 2019 CS\nIssus de prepas francaises\nProportion issus de prepa parisienne : $(string(prop)) %" | |
groupedbar( | |
deps, | |
[men women], | |
bar_position = :stack, | |
bar_width=0.7, labels=["Hommes" "Femmes"], | |
xlabel="Departement de Prepa", ylabel="Nombre d'integres", title=title | |
) | |
savefig("integres_$(bank)_2019.png") | |
plot!() | |
end | |
plots = [ | |
do_bank(bank) for bank in BANKS | |
] | |
plot(plots) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment