Eastkap/vroom_betatst.py

## vroom_betatst.py
import sys, re
from bs4 import BeautifulSoup


def fnum(s):
    for i in range(len(s)):
        if (s[i].isdigit()):
            return i


if sys.version_info[0] == 3:
    from urllib.request import urlopen
else:
    # Not Python 3 - today, it is most likely to be Python 2
    # But note that this might need an update when Python 4
    # might be around one day
    from urllib import urlopen

# Your code where you can use urlopen
with urlopen("https://www.vroomvroom.fr/auto-ecoles/paris/paris") as url:
    s = url.read()
main = BeautifulSoup(s, 'html.parser')

with open('/Users/Jacobo/Documents/python/vroom/bvrooma.csv', 'wb') as f:
    f.write(('nom,adresse,cp,ville,taux\n').encode('utf-8'))
    with urlopen('https://www.vroomvroom.fr/auto-ecoles/paris/paris/paris-police') as url:
          #  print("https://www.vroomvroom.fr"+auto['href'])
        s = url.read()
    soup = BeautifulSoup(s, 'html.parser')

    # titre/nom
    f.write((soup.title.text[:-24]).encode('utf-8'))
    f.write(','.encode('utf-8'))
    # adresse
    f.write('"'.encode('utf-8') + soup.find("span", {"itemprop": "streetAddress"}).text.encode('utf-8') + '"'.encode(
            'utf-8'))
    f.write(','.encode('utf-8'))
    # cp
    f.write(soup.find("span", {"itemprop": "postalCode"}).text.encode('utf-8'))
    f.write(','.encode('utf-8'))
    # ville
    f.write(soup.find("span", {"itemprop": "addressLocality"}).text.encode('utf-8'))
    # les taux
    x = 0
    for taux in soup.findAll("div", {"class": "row row-success-rates"}):
        if (x != 0):
            if ('Code' in taux.text):
                #f.write('\n'.encode('utf-8'))
                break
            print(taux.text[1:-22])
        x = 1
        s = taux.text
        m = re.search("\d", s)
        if (m):
            m = m.start()
            if s[m + 1] == ')':
                a = re.search("\d", s[m + 1:]).start()
                m +=a+1
                #print(a,m)
            elif s[m+2]==')':
                a = re.search("\d", s[m + 2:]).start()
                m += a + 2

            f.write(','.encode('utf-8'))
            f.write(s[1:m - 1].encode('utf-8'))
            f.write(','.encode('utf-8'))
            f.write(s[m:m + 2].encode('utf-8'))
            f.write(','.encode('utf-8'))
            m = re.search("sur \d", s)
            if m:
                m = m.end() - 1
                f.write(s[m:m + 2].encode('utf-8'))
    f.write('\n'.encode('utf-8'))

## vroom_csv2bdd.py
import csv

def dif(text, header):
    for i in range(len(header)):
        if (header[i] == (text+'taux')) or (header[i] == (text+'sur')):
            return i
    return -1


def ajout(ligne, rows):
    tab = []
    for i in range(4):
        tab.append(ligne.pop(0))
    while (ligne != []):
        #print(ligne)
        a = dif(ligne[0], rows[0][4:])
        if a == 0:
            ligne.pop(0)
        elif a > 0:
            print(tab)
            for i in range(a*2):
                tab.append(' ')
            print(tab)
        else:
            x=ligne.pop(0)
            #rows[0].append(x)
            rows[0].append(x+'taux')
            rows[0].append(x + 'sur')
        if(ligne!=[]):
            for i in range(2):
                tab.append(ligne.pop(0))
    rows.append(tab)
# header[4:]

with open('/Users/Jacobo/Documents/python/vroom/vroomf.csv','rt',encoding='utf8') as f:
    lignes=csv.reader(f)
    rows=[['nom','adresse','cp', 'ville']]
    for ligne in lignes:
        ajout(ligne, rows)

with open('/Users/Jacobo/Documents/python/vroom/trif.csv','wt',encoding='utf8') as f:
    writer = csv.writer(f)
    rows.pop(1)
    writer.writerows(rows)

## vroom_vroom.py
import sys, re
from bs4 import BeautifulSoup


def fnum(s):
    for i in range(len(s)):
        if (s[i].isdigit()):
            return i


if sys.version_info[0] == 3:
    from urllib.request import urlopen
else:
    # Not Python 3 - today, it is most likely to be Python 2
    # But note that this might need an update when Python 4
    # might be around one day
    from urllib import urlopen

# Your code where you can use urlopen
with urlopen("https://www.vroomvroom.fr/auto-ecoles/paris/paris") as url:
    s = url.read()
main = BeautifulSoup(s, 'html.parser')

with open('/Users/Jacobo/Documents/python/vroom/vroomf.csv', 'wb') as f:
    f.write(('nom,adresse,cp,ville,taux\n').encode('utf-8'))
    for auto in main.findAll("a", {"class": "school-listing-title"}):
        with urlopen("https://www.vroomvroom.fr" + auto['href']) as url:
            print("https://www.vroomvroom.fr" + auto['href'])
            s = url.read()
        soup = BeautifulSoup(s, 'html.parser')

        # titre/nom
        f.write((soup.title.text[:-24]).encode('utf-8'))
        f.write(','.encode('utf-8'))
        # adresse
        f.write(
            '"'.encode('utf-8') + soup.find("span", {"itemprop": "streetAddress"}).text.encode('utf-8') + '"'.encode(
                'utf-8'))
        f.write(','.encode('utf-8'))
        # cp
        f.write(soup.find("span", {"itemprop": "postalCode"}).text.encode('utf-8'))
        f.write(','.encode('utf-8'))
        # ville
        f.write(soup.find("span", {"itemprop": "addressLocality"}).text.encode('utf-8'))
        # les taux
        x = 0
        for taux in soup.findAll("div", {"class": "row row-success-rates"}):
            if (x != 0):
                if ('Code' in taux.text):
                    # f.write('\n'.encode('utf-8'))
                    break
                # print(taux.text[1:-22])
            x = 1
            s = taux.text
            m = re.search("\d", s)
            if (m):
                m = m.start()
                if s[m + 1] == ')':
                    a = re.search("\d", s[m + 1:]).start()
                    m += a + 1
                elif s[m + 2] == ')':
                    a = re.search("\d", s[m + 2:]).start()
                    m += a + 2
                f.write(','.encode('utf-8'))
                f.write(s[1:m - 1].encode('utf-8'))
                f.write(','.encode('utf-8'))
                f.write(s[m:m + 2].encode('utf-8'))
                f.write(','.encode('utf-8'))
                m = re.search("sur \d", s)
                if m:
                    m = m.end() - 1
                    f.write(s[m:m + 2].encode('utf-8'))
        f.write('\n'.encode('utf-8'))


# print(soup.prettify())

# soup.title.text : titre 'CER LEGENDRE à Paris - Vroomvroom.fr'
# soup.find("span", {"itemprop": "streetAddress"}).text
# soup.find("span", {"itemprop": "postalCode"}).text
# soup.find("span", {"itemprop": "addressLocality"}).text
# soup.findAll("div", {"class": "row row-success-rates"})[i].text
# ici lidee serait de check loccurence nouvelle de \nCode de la route\ pour arreter

# on: https://www.vroomvroom.fr/auto-ecoles/paris/paris
# soup.findAll("a", {"class": "school-listing-title"})[0]
# soup.findAll("a", {"class": "school-listing-title"})[0]['href']


# print(s)
	import sys, re
	from bs4 import BeautifulSoup


	def fnum(s):
	for i in range(len(s)):
	if (s[i].isdigit()):
	return i


	if sys.version_info[0] == 3:
	from urllib.request import urlopen
	else:
	# Not Python 3 - today, it is most likely to be Python 2
	# But note that this might need an update when Python 4
	# might be around one day
	from urllib import urlopen

	# Your code where you can use urlopen
	with urlopen("https://www.vroomvroom.fr/auto-ecoles/paris/paris") as url:
	s = url.read()
	main = BeautifulSoup(s, 'html.parser')

	with open('/Users/Jacobo/Documents/python/vroom/bvrooma.csv', 'wb') as f:
	f.write(('nom,adresse,cp,ville,taux\n').encode('utf-8'))
	with urlopen('https://www.vroomvroom.fr/auto-ecoles/paris/paris/paris-police') as url:
	# print("https://www.vroomvroom.fr"+auto['href'])
	s = url.read()
	soup = BeautifulSoup(s, 'html.parser')

	# titre/nom
	f.write((soup.title.text[:-24]).encode('utf-8'))
	f.write(','.encode('utf-8'))
	# adresse
	f.write('"'.encode('utf-8') + soup.find("span", {"itemprop": "streetAddress"}).text.encode('utf-8') + '"'.encode(
	'utf-8'))
	f.write(','.encode('utf-8'))
	# cp
	f.write(soup.find("span", {"itemprop": "postalCode"}).text.encode('utf-8'))
	f.write(','.encode('utf-8'))
	# ville
	f.write(soup.find("span", {"itemprop": "addressLocality"}).text.encode('utf-8'))
	# les taux
	x = 0
	for taux in soup.findAll("div", {"class": "row row-success-rates"}):
	if (x != 0):
	if ('Code' in taux.text):
	#f.write('\n'.encode('utf-8'))
	break
	print(taux.text[1:-22])
	x = 1
	s = taux.text
	m = re.search("\d", s)
	if (m):
	m = m.start()
	if s[m + 1] == ')':
	a = re.search("\d", s[m + 1:]).start()
	m +=a+1
	#print(a,m)
	elif s[m+2]==')':
	a = re.search("\d", s[m + 2:]).start()
	m += a + 2

	f.write(','.encode('utf-8'))
	f.write(s[1:m - 1].encode('utf-8'))
	f.write(','.encode('utf-8'))
	f.write(s[m:m + 2].encode('utf-8'))
	f.write(','.encode('utf-8'))
	m = re.search("sur \d", s)
	if m:
	m = m.end() - 1
	f.write(s[m:m + 2].encode('utf-8'))
	f.write('\n'.encode('utf-8'))
	import csv

	def dif(text, header):
	for i in range(len(header)):
	if (header[i] == (text+'taux')) or (header[i] == (text+'sur')):
	return i
	return -1


	def ajout(ligne, rows):
	tab = []
	for i in range(4):
	tab.append(ligne.pop(0))
	while (ligne != []):
	#print(ligne)
	a = dif(ligne[0], rows[0][4:])
	if a == 0:
	ligne.pop(0)
	elif a > 0:
	print(tab)
	for i in range(a*2):
	tab.append(' ')
	print(tab)
	else:
	x=ligne.pop(0)
	#rows[0].append(x)
	rows[0].append(x+'taux')
	rows[0].append(x + 'sur')
	if(ligne!=[]):
	for i in range(2):
	tab.append(ligne.pop(0))
	rows.append(tab)
	# header[4:]

	with open('/Users/Jacobo/Documents/python/vroom/vroomf.csv','rt',encoding='utf8') as f:
	lignes=csv.reader(f)
	rows=[['nom','adresse','cp', 'ville']]
	for ligne in lignes:
	ajout(ligne, rows)

	with open('/Users/Jacobo/Documents/python/vroom/trif.csv','wt',encoding='utf8') as f:
	writer = csv.writer(f)
	rows.pop(1)
	writer.writerows(rows)