Last active
February 6, 2017 16:29
-
-
Save jul/1c7b058476481fe58fa35eb2363621ea to your computer and use it in GitHub Desktop.
Êtes vous plutôt banlieue ou pas?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import pylab as P | |
import os | |
import readline | |
import atexit | |
from pyquery import PyQuery as S | |
import re | |
from json import load, dump, dumps | |
# READLINE / init SETUP: readline history + autosave of fetched data on exit | |
histfile = os.path.join(os.path.expanduser("~"), ".comparateur_hist") | |
datafile = os.path.join(os.path.expanduser("~"), ".comparateur.data") | |
cache={} | |
atexit.register(readline.write_history_file, histfile) | |
def save(): | |
readline.write_history_file(histfile) | |
with open(datafile, "w") as f: | |
dump(cache,f) | |
try: | |
readline.read_history_file(histfile) | |
with open(datafile, "rt") as df: | |
cache = load(df) | |
# default history len is -1 (infinite), which may grow unruly | |
readline.set_history_length(1000) | |
except IOError: | |
pass | |
#Completion | |
readline.parse_and_bind("tab: complete") | |
def choose_in_cache(text, state): | |
global cache | |
candidate = [ city for city in cache.keys() if text.lower() in city.lower()] | |
return candidate[state] if state < len(candidate) else None | |
readline.set_completer(choose_in_cache) | |
#Data and misc utilities | |
flatten = lambda l: [item for sublist in l for item in sublist] | |
has_zip_code = re.compile(".*(\(\d+\))").match | |
is_percentage = re.compile("(\d+,\d\d %)").match | |
bins = [ 0, 10000, 12000, 15000, 20000, 30000, 50000, 100000, 150000 ] | |
pbins = flatten(zip(bins[:-1], bins[1:])) | |
Beaumont = [ 22.26, 5.84, 10.67, 17.34, 20.48, 16.17, 6.58, 0.66, ] | |
Sceaux = [ 11.37 , 2.78 , 4.20 , 9.53 , 17.15 , 19.47 , 22.24 , 13.25 , ] | |
# Main loop | |
while True: | |
zip_code_or_name = raw_input(""" | |
Comparateur de revenus fiscaux | |
Rentrez un code postal ou un nom de ville (- pour les espaces) | |
Tapper une partie du nom ou code postal puis TAB pour voir ce dont le cache dispose. Si un seul choix est possible, la completion sera automatique | |
q ou Q pour quitter (attention tous les graphs disparaissent à la sortie) | |
? """) | |
zip_code_or_name=unicode(zip_code_or_name) | |
if zip_code_or_name.upper()=='Q': | |
print "Saving history...." | |
save() | |
break | |
city=None | |
## If not in cache then fetch choices on query andd parse with pyquery | |
if not zip_code_or_name in cache: | |
choose= S(url="http://www.journaldunet.com/economie/impots/recherche?q=%s" % zip_code_or_name) | |
choose.make_links_absolute() | |
choices = choose(".odSquareList") | |
choices = S("li > a", choices) | |
choices = [ c for c in choices if has_zip_code(c.text) ] | |
print "\n".join(["%3d %s" % (i,c.text) for i,c in enumerate(choices) ] ) | |
line = len(choices) + 1 | |
try: | |
while line > len(choices) : | |
line = input("Entrez un numero de ligne\n? ") | |
except Exception as e: | |
print "I suppose no choices you like" | |
continue | |
url = S(choices[line]).attr("href") | |
print "Les donnes brutes sont ici :%s" % url | |
city = choices[line].text | |
else: | |
city = zip_code_or_name | |
if city in cache: | |
print "cache hit : on reprend les données prés sauvées" | |
val = cache[city] | |
else: | |
Input=S(url=url) | |
try: | |
table = S(".odTable tbody", Input) | |
except Exception as e: | |
print table.text | |
print "web page does not have the table I expect" | |
print "Skipping the rest" | |
continue | |
val = [] | |
#TODO : porcasse selection faire plus propre (on prend tout ce qui ressemble a un %) | |
try: | |
for i,td in enumerate(S("td",table)): | |
data = td.text | |
if is_percentage(data): | |
data = data.replace("%","") | |
data= data.replace(",",".") | |
data= data.strip() | |
val +=[ float(data) ] | |
except Exception as e: | |
print table.text() | |
print "web page does not have the table I expect" | |
if len(val) != len(Beaumont): | |
print "pas assez de donnees, cette version sait pas faire" | |
continue | |
cache[city] = val | |
save() | |
# on déteste tous matplotlib (moche) mais j'avais pas envie de faire beau | |
P.figure() | |
def mplot(y, *a , **kw): | |
global P, pbins | |
return P.plot(pbins, flatten(zip(y, y)), *a , **kw) | |
mplot( Beaumont,'k--', label="Beaumont (banlieue)") | |
mplot( Sceaux, 'r:', label="Sceaux (ville ideale)") | |
mplot( val, label = city) | |
P.title(u"Repartition des revenus fiscaux par tranches\nImpôts 2014 source http://www.journaldunet.com/") | |
P.legend(loc='upper center', bbox_to_anchor=(0.5, 0.1), fancybox=True, shadow=True) | |
P.show(block=False) | |
save() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment