Skip to content

Instantly share code, notes, and snippets.

@jul
Last active February 6, 2017 16:29
Show Gist options
  • Save jul/1c7b058476481fe58fa35eb2363621ea to your computer and use it in GitHub Desktop.
Save jul/1c7b058476481fe58fa35eb2363621ea to your computer and use it in GitHub Desktop.
Êtes vous plutôt banlieue ou pas?
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import pylab as P
import os
import readline
import atexit
from pyquery import PyQuery as S
import re
from json import load, dump, dumps
# READLINE / init SETUP: readline history + autosave of fetched data on exit
histfile = os.path.join(os.path.expanduser("~"), ".comparateur_hist")
datafile = os.path.join(os.path.expanduser("~"), ".comparateur.data")
cache={}
atexit.register(readline.write_history_file, histfile)
def save():
readline.write_history_file(histfile)
with open(datafile, "w") as f:
dump(cache,f)
try:
readline.read_history_file(histfile)
with open(datafile, "rt") as df:
cache = load(df)
# default history len is -1 (infinite), which may grow unruly
readline.set_history_length(1000)
except IOError:
pass
#Completion
readline.parse_and_bind("tab: complete")
def choose_in_cache(text, state):
global cache
candidate = [ city for city in cache.keys() if text.lower() in city.lower()]
return candidate[state] if state < len(candidate) else None
readline.set_completer(choose_in_cache)
#Data and misc utilities
flatten = lambda l: [item for sublist in l for item in sublist]
has_zip_code = re.compile(".*(\(\d+\))").match
is_percentage = re.compile("(\d+,\d\d %)").match
bins = [ 0, 10000, 12000, 15000, 20000, 30000, 50000, 100000, 150000 ]
pbins = flatten(zip(bins[:-1], bins[1:]))
Beaumont = [ 22.26, 5.84, 10.67, 17.34, 20.48, 16.17, 6.58, 0.66, ]
Sceaux = [ 11.37 , 2.78 , 4.20 , 9.53 , 17.15 , 19.47 , 22.24 , 13.25 , ]
# Main loop
while True:
zip_code_or_name = raw_input("""
Comparateur de revenus fiscaux
Rentrez un code postal ou un nom de ville (- pour les espaces)
Tapper une partie du nom ou code postal puis TAB pour voir ce dont le cache dispose. Si un seul choix est possible, la completion sera automatique
q ou Q pour quitter (attention tous les graphs disparaissent à la sortie)
? """)
zip_code_or_name=unicode(zip_code_or_name)
if zip_code_or_name.upper()=='Q':
print "Saving history...."
save()
break
city=None
## If not in cache then fetch choices on query andd parse with pyquery
if not zip_code_or_name in cache:
choose= S(url="http://www.journaldunet.com/economie/impots/recherche?q=%s" % zip_code_or_name)
choose.make_links_absolute()
choices = choose(".odSquareList")
choices = S("li > a", choices)
choices = [ c for c in choices if has_zip_code(c.text) ]
print "\n".join(["%3d %s" % (i,c.text) for i,c in enumerate(choices) ] )
line = len(choices) + 1
try:
while line > len(choices) :
line = input("Entrez un numero de ligne\n? ")
except Exception as e:
print "I suppose no choices you like"
continue
url = S(choices[line]).attr("href")
print "Les donnes brutes sont ici :%s" % url
city = choices[line].text
else:
city = zip_code_or_name
if city in cache:
print "cache hit : on reprend les données prés sauvées"
val = cache[city]
else:
Input=S(url=url)
try:
table = S(".odTable tbody", Input)
except Exception as e:
print table.text
print "web page does not have the table I expect"
print "Skipping the rest"
continue
val = []
#TODO : porcasse selection faire plus propre (on prend tout ce qui ressemble a un %)
try:
for i,td in enumerate(S("td",table)):
data = td.text
if is_percentage(data):
data = data.replace("%","")
data= data.replace(",",".")
data= data.strip()
val +=[ float(data) ]
except Exception as e:
print table.text()
print "web page does not have the table I expect"
if len(val) != len(Beaumont):
print "pas assez de donnees, cette version sait pas faire"
continue
cache[city] = val
save()
# on déteste tous matplotlib (moche) mais j'avais pas envie de faire beau
P.figure()
def mplot(y, *a , **kw):
global P, pbins
return P.plot(pbins, flatten(zip(y, y)), *a , **kw)
mplot( Beaumont,'k--', label="Beaumont (banlieue)")
mplot( Sceaux, 'r:', label="Sceaux (ville ideale)")
mplot( val, label = city)
P.title(u"Repartition des revenus fiscaux par tranches\nImpôts 2014 source http://www.journaldunet.com/")
P.legend(loc='upper center', bbox_to_anchor=(0.5, 0.1), fancybox=True, shadow=True)
P.show(block=False)
save()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment