jul/comparateur.py

## comparateur.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import pylab as P
import os
import readline
import atexit
from pyquery import PyQuery as S
import re
from json import load, dump, dumps

# READLINE / init SETUP: readline history + autosave of fetched data on exit
histfile = os.path.join(os.path.expanduser("~"), ".comparateur_hist")
datafile = os.path.join(os.path.expanduser("~"), ".comparateur.data")
cache={}

atexit.register(readline.write_history_file, histfile)

def save():
    readline.write_history_file(histfile)
    with open(datafile, "w")  as f:
        dump(cache,f)
try:
    readline.read_history_file(histfile)
    with open(datafile, "rt") as df:
        cache = load(df)

    # default history len is -1 (infinite), which may grow unruly
    readline.set_history_length(1000)
except IOError:
    pass

#Completion
readline.parse_and_bind("tab: complete")
def choose_in_cache(text, state):
    global cache
    candidate = [ city for city in cache.keys() if text.lower() in city.lower()]
    return candidate[state] if state < len(candidate) else None
readline.set_completer(choose_in_cache)

#Data and misc utilities
flatten = lambda l: [item for sublist in l for item in sublist]
has_zip_code = re.compile(".*(\(\d+\))").match
is_percentage = re.compile("(\d+,\d\d %)").match

bins = [ 0, 10000, 12000, 15000, 20000, 30000, 50000, 100000, 150000 ]
pbins = flatten(zip(bins[:-1], bins[1:]))
Beaumont = [ 22.26, 5.84, 10.67, 17.34, 20.48, 16.17, 6.58, 0.66, ]
Sceaux = [ 11.37 , 2.78 , 4.20 , 9.53 , 17.15 , 19.47 , 22.24 , 13.25 , ]

# Main loop
while True:
    zip_code_or_name = raw_input("""
Comparateur de revenus fiscaux

Rentrez un code postal ou un nom de ville (- pour les espaces)
Tapper une partie du nom ou code postal puis TAB pour voir ce dont le cache dispose. Si un seul choix est possible, la completion sera automatique
q ou Q pour quitter (attention tous les graphs disparaissent à la sortie)

? """)
    zip_code_or_name=unicode(zip_code_or_name)
    if zip_code_or_name.upper()=='Q':
        print "Saving history...."
        save()
        break

    city=None
    ## If not in cache then fetch choices on query andd parse with pyquery
    if not zip_code_or_name in cache:
        choose= S(url="http://www.journaldunet.com/economie/impots/recherche?q=%s" % zip_code_or_name)
        choose.make_links_absolute()
        choices = choose(".odSquareList")
        choices = S("li > a", choices)
        choices = [ c for c in choices if has_zip_code(c.text) ]
        print "\n".join(["%3d  %s" % (i,c.text) for i,c in enumerate(choices)  ] )
        line = len(choices) + 1
        try:
            while line > len(choices) :
                line = input("Entrez un numero de ligne\n? ")
        except Exception as e:
            print "I suppose no choices you like"
            continue
        url = S(choices[line]).attr("href")
        print "Les donnes brutes sont ici :%s" % url
        city = choices[line].text
    else:
        city = zip_code_or_name

    if city in cache:
        print "cache hit : on reprend les données prés sauvées"
        val = cache[city]
    else:
        Input=S(url=url)
        try:
            table = S(".odTable tbody", Input)
        except Exception as e:
            print table.text
            print "web page does not have the table I expect"
            print "Skipping the rest"
            continue

        val = []
        #TODO : porcasse selection faire plus propre (on prend tout ce qui ressemble a un %)
        try:
            for i,td in enumerate(S("td",table)):
                data = td.text
                if is_percentage(data):
                    data  = data.replace("%","")
                    data= data.replace(",",".")
                    data= data.strip()
                    val +=[ float(data) ]

        except Exception as e:
            print table.text()
            print "web page does not have the table I expect"
        if len(val) != len(Beaumont):
            print "pas assez de donnees, cette version sait pas faire"
            continue
        cache[city] = val
        save()

    # on déteste tous matplotlib (moche) mais j'avais pas envie de faire beau
    P.figure()

    def mplot(y, *a , **kw):
        global P, pbins
        return P.plot(pbins, flatten(zip(y, y)), *a , **kw)

    mplot( Beaumont,'k--', label="Beaumont (banlieue)")
    mplot( Sceaux, 'r:', label="Sceaux (ville ideale)")
    mplot( val, label = city)

    P.title(u"Repartition des revenus fiscaux par tranches\nImpôts 2014 source http://www.journaldunet.com/")
    P.legend(loc='upper center', bbox_to_anchor=(0.5, 0.1), fancybox=True, shadow=True)
    P.show(block=False)
    save()
	#!/usr/bin/env python
	# -- coding: utf-8 --
	import pylab as P
	import os
	import readline
	import atexit
	from pyquery import PyQuery as S
	import re
	from json import load, dump, dumps

	# READLINE / init SETUP: readline history + autosave of fetched data on exit
	histfile = os.path.join(os.path.expanduser("~"), ".comparateur_hist")
	datafile = os.path.join(os.path.expanduser("~"), ".comparateur.data")
	cache={}

	atexit.register(readline.write_history_file, histfile)

	def save():
	readline.write_history_file(histfile)
	with open(datafile, "w") as f:
	dump(cache,f)
	try:
	readline.read_history_file(histfile)
	with open(datafile, "rt") as df:
	cache = load(df)

	# default history len is -1 (infinite), which may grow unruly
	readline.set_history_length(1000)
	except IOError:
	pass

	#Completion
	readline.parse_and_bind("tab: complete")
	def choose_in_cache(text, state):
	global cache
	candidate = [ city for city in cache.keys() if text.lower() in city.lower()]
	return candidate[state] if state < len(candidate) else None
	readline.set_completer(choose_in_cache)

	#Data and misc utilities
	flatten = lambda l: [item for sublist in l for item in sublist]
	has_zip_code = re.compile(".*(\(\d+\))").match
	is_percentage = re.compile("(\d+,\d\d %)").match

	bins = [ 0, 10000, 12000, 15000, 20000, 30000, 50000, 100000, 150000 ]
	pbins = flatten(zip(bins[:-1], bins[1:]))
	Beaumont = [ 22.26, 5.84, 10.67, 17.34, 20.48, 16.17, 6.58, 0.66, ]
	Sceaux = [ 11.37 , 2.78 , 4.20 , 9.53 , 17.15 , 19.47 , 22.24 , 13.25 , ]

	# Main loop
	while True:
	zip_code_or_name = raw_input("""
	Comparateur de revenus fiscaux

	Rentrez un code postal ou un nom de ville (- pour les espaces)
	Tapper une partie du nom ou code postal puis TAB pour voir ce dont le cache dispose. Si un seul choix est possible, la completion sera automatique
	q ou Q pour quitter (attention tous les graphs disparaissent à la sortie)

	? """)
	zip_code_or_name=unicode(zip_code_or_name)
	if zip_code_or_name.upper()=='Q':
	print "Saving history...."
	save()
	break

	city=None
	## If not in cache then fetch choices on query andd parse with pyquery
	if not zip_code_or_name in cache:
	choose= S(url="http://www.journaldunet.com/economie/impots/recherche?q=%s" % zip_code_or_name)
	choose.make_links_absolute()
	choices = choose(".odSquareList")
	choices = S("li > a", choices)
	choices = [ c for c in choices if has_zip_code(c.text) ]
	print "\n".join(["%3d %s" % (i,c.text) for i,c in enumerate(choices) ] )
	line = len(choices) + 1
	try:
	while line > len(choices) :
	line = input("Entrez un numero de ligne\n? ")
	except Exception as e:
	print "I suppose no choices you like"
	continue
	url = S(choices[line]).attr("href")
	print "Les donnes brutes sont ici :%s" % url
	city = choices[line].text
	else:
	city = zip_code_or_name

	if city in cache:
	print "cache hit : on reprend les données prés sauvées"
	val = cache[city]
	else:
	Input=S(url=url)
	try:
	table = S(".odTable tbody", Input)
	except Exception as e:
	print table.text
	print "web page does not have the table I expect"
	print "Skipping the rest"
	continue

	val = []
	#TODO : porcasse selection faire plus propre (on prend tout ce qui ressemble a un %)
	try:
	for i,td in enumerate(S("td",table)):
	data = td.text
	if is_percentage(data):
	data = data.replace("%","")
	data= data.replace(",",".")
	data= data.strip()
	val +=[ float(data) ]

	except Exception as e:
	print table.text()
	print "web page does not have the table I expect"
	if len(val) != len(Beaumont):
	print "pas assez de donnees, cette version sait pas faire"
	continue
	cache[city] = val
	save()

	# on déteste tous matplotlib (moche) mais j'avais pas envie de faire beau
	P.figure()

	def mplot(y, a , *kw):
	global P, pbins
	return P.plot(pbins, flatten(zip(y, y)), a , *kw)

	mplot( Beaumont,'k--', label="Beaumont (banlieue)")
	mplot( Sceaux, 'r:', label="Sceaux (ville ideale)")
	mplot( val, label = city)

	P.title(u"Repartition des revenus fiscaux par tranches\nImpôts 2014 source http://www.journaldunet.com/")
	P.legend(loc='upper center', bbox_to_anchor=(0.5, 0.1), fancybox=True, shadow=True)
	P.show(block=False)
	save()