Created
October 28, 2021 05:42
-
-
Save siayi/10ce946199ac4c168c646d863dbd3a83 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright (c) 2021. THIS SOURCE CODE BELONGS TO WB&C/SMARTEEZ INC. ANY OUTSIDER REPLICATION OF IT IS LEGALLY PERSECUTED. | |
# _______ ________ ________ _______ ________ ________ | |
# |\ ___ \ |\ ___ \|\ ___ \|\ ___ \ |\ ____\|\ __ \ | |
# \ \ __/|\ \ \\ \ \ \ \_|\ \ \ __/|\ \ \___|\ \ \|\ \ | |
# \ \ \_|/_\ \ \\ \ \ \ \ \\ \ \ \_|/_\ \_____ \ \ __ \ | |
# \ \ \_|\ \ \ \\ \ \ \ \_\\ \ \ \_|\ \|____|\ \ \ \ \ \ | |
# \ \_______\ \__\\ \__\ \_______\ \_______\____\_\ \ \__\ \__\ | |
# \|_______|\|__| \|__|\|_______|\|_______|\_________\|__|\|__| | |
# \|_________| | |
# | |
# | |
# \ ___ ___ _______ ________ _________ _______ | |
# |\ / /|\ ___ \ |\ ___ \|\___ ___\\ ___ \ | |
# \ \ \ / / | \ __/|\ \ \\ \ \|___ \ \_\ \ __/| | |
# \ \ \/ / / \ \ \_|/_\ \ \\ \ \ \ \ \ \ \ \_|/__ | |
# \ \ / / \ \ \_|\ \ \ \\ \ \ \ \ \ \ \ \_|\ \ | |
# \ \__/ / \ \_______\ \__\\ \__\ \ \__\ \ \_______\ | |
# \|__|/ \|_______|\|__| \|__| \|__| \|_______| | |
# | |
# | |
# | |
# _______ ________ ___ ___ ________ ________ _______ | |
# |\ ___ \ |\ ___ \ |\ \ |\ \|\ ____\|\ ___ \|\ ___ \ | |
# \ \ __/|\ \ \\ \ \ \ \ \ \ \ \ \ \___|\ \ \\ \ \ \ __/| | |
# \ \ \_|/_\ \ \\ \ \ \ \ \ \ \ \ \ \ __\ \ \\ \ \ \ \_|/__ | |
# \ \ \_|\ \ \ \\ \ \ \ \ \____\ \ \ \ \|\ \ \ \\ \ \ \ \_|\ \ | |
# \ \_______\ \__\\ \__\ \ \_______\ \__\ \_______\ \__\\ \__\ \_______\ | |
# \|_______|\|__| \|__| \|_______|\|__|\|_______|\|__| \|__|\|_______| | |
import csv | |
from time import sleep | |
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.common.keys import Keys | |
from selenium.webdriver.firefox.options import Options | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.webdriver.support.select import Select | |
from selenium.webdriver.support.ui import WebDriverWait | |
from tqdm import tqdm | |
def go(): | |
output = [] | |
for postal_code in tqdm(['69001', '75000', '06000', '05000', '15000', '63110']): | |
print('pay attention 1') | |
for car in tqdm([2000, 5000, 15000, 25000, 50000, 80000, 100000, 150000, 300000]): | |
firefox_options = Options() | |
# firefox_options.add_argument('--headless') | |
selenium = webdriver.Firefox( | |
executable_path="/home/zerocold/Downloads/geckodriver-v0.26.0-linux64/geckodriver", | |
options=firefox_options) | |
selenium.get('https://comparateur-offres.energie-info.fr/') | |
element = selenium.find_element_by_xpath('/html/body/header/div/div[4]/label[2]/span') | |
element.click() | |
element = WebDriverWait(selenium, 20).until( | |
EC.element_to_be_clickable((By.XPATH, '/html/body/div/div/div/footer/div/button[1]'))) | |
element.click() | |
sleep(3) | |
element = WebDriverWait(selenium, 20).until( | |
EC.element_to_be_clickable((By.XPATH, '//*[@id="profil_zipcode"]'))) | |
element.send_keys(postal_code) | |
select = Select(WebDriverWait(selenium, 20).until( | |
EC.element_to_be_clickable((By.XPATH, '//*[@id="profil_cityId"]')))) | |
# select = Select(selenium.find_element_by_id('profil_cityId')) | |
for o in select.options: | |
if postal_code == '05000': | |
if o.get_attribute("data-zipcode") == postal_code and o.get_attribute('value') == '1822': | |
o.click() | |
elif postal_code == '': | |
pass | |
else: | |
if o.get_attribute("data-zipcode") == postal_code: | |
o.click() | |
element = WebDriverWait(selenium, 20).until(EC.element_to_be_clickable( | |
(By.XPATH, '/html/body/main/div/div/div[2]/div[2]/form/div[3]/div[2]/div[1]/label[2]/span'))) | |
element.click() | |
element = selenium.find_element_by_xpath('/html/body/main/div/div/div[2]/div[2]/form/div[4]/button') | |
element.click() | |
sleep(1) | |
select = Select(selenium.find_element_by_xpath('//*[@id="gas_consumption_consumptionType"]')) | |
select.select_by_value("1") | |
# element = selenium.find_element_by_xpath('//*[@id="profil_energyType"]/label[2]/span/i[2]') | |
# element.click() | |
element = selenium.find_element_by_xpath('//*[@id="gas_consumption_consumption"]') | |
element.send_keys(str(car)) | |
element.send_keys(Keys.TAB) | |
element = selenium.find_element_by_xpath('/html/body/main/div/div/div[2]/div/form/div[7]/button') | |
element.click() | |
select = Select(selenium.find_element_by_xpath('//*[@id="search_configuration_sortModel_sortBy"]')) | |
select.select_by_value('2') | |
element = selenium.find_element_by_xpath('/html/body/main/div/div/div[2]/div[1]/form/div[7]/button') | |
element.click() | |
element = WebDriverWait(selenium, 20).until(EC.element_to_be_clickable( | |
(By.XPATH, '/html/body/main/div/div/div[2]/div[2]/div/footer/div/button'))) | |
element.click() | |
elements = selenium.find_elements_by_xpath( | |
'//*[@id="anchor"]/div/div[2]/div[1]/div[3]/div[@class="offre offer"]') | |
for e in elements: | |
price = e.find_element_by_xpath('./header/div/span/span') | |
title = e.find_element_by_xpath('./main/article/div[3]/h4') | |
image = e.find_element_by_xpath('./main/article/div[2]/a/figure/img').get_attribute('src') | |
competitor_url = e.find_element_by_xpath('./main/article/div[2]/a').get_attribute('href') | |
biogaz_percentage = e.find_element_by_xpath('./main/article/div[3]/div/div[2]/span[2]').text.replace( | |
'%', | |
'').strip() | |
output.append( | |
{'postal_code': postal_code, 'car': car, 'title': title.text, 'price': price.text, 'image': image, | |
'biogaz_percentage': biogaz_percentage, | |
'competitor_url': competitor_url}) | |
print(title.text) | |
print(price.text) | |
print('-----') | |
selenium.close() | |
keys = output[0].keys() | |
with open('people.csv', 'w', newline='') as output_file: | |
dict_writer = csv.DictWriter(output_file, keys) | |
dict_writer.writeheader() | |
dict_writer.writerows(output) | |
def go_2(): | |
firefox_options = Options() | |
# firefox_options.add_argument('--headless') | |
selenium = webdriver.Firefox(executable_path="/home/zerocold/Downloads/geckodriver-v0.26.0-linux64/geckodriver", | |
options=firefox_options) | |
selenium.get('https://comparateur-offres.energie-info.fr/') | |
element = selenium.find_element_by_xpath('/html/body/header/div/div[4]/label[2]/span') | |
element.click() | |
element = WebDriverWait(selenium, 20).until( | |
EC.element_to_be_clickable((By.XPATH, '/html/body/div/div/div/footer/div/button[1]'))) | |
element.click() | |
element = selenium.find_element_by_xpath('//*[@id="profil_zipcode"]') | |
element.send_keys('80100') | |
select = Select(WebDriverWait(selenium, 20).until( | |
EC.element_to_be_clickable((By.XPATH, '//*[@id="profil_cityId"]')))) | |
# select = Select(selenium.find_element_by_id('profil_cityId')) | |
for o in select.options: | |
if o.get_attribute("data-zipcode") == '80100': | |
o.click() | |
element = WebDriverWait(selenium, 20).until(EC.element_to_be_clickable( | |
(By.XPATH, '/html/body/main/div/div/div[2]/div[2]/form/div[3]/div[2]/div[1]/label[2]/span'))) | |
element.click() | |
element = selenium.find_element_by_xpath('/html/body/main/div/div/div[2]/div[2]/form/div[4]/button') | |
element.click() | |
sleep(1) | |
select = Select(selenium.find_element_by_xpath('//*[@id="gas_consumption_consumptionType"]')) | |
select.select_by_value("1") | |
# element = selenium.find_element_by_xpath('//*[@id="profil_energyType"]/label[2]/span/i[2]') | |
# element.click() | |
element = selenium.find_element_by_xpath('//*[@id="gas_consumption_consumption"]') | |
element.send_keys('82.5') | |
element.send_keys(Keys.TAB) | |
element = selenium.find_element_by_xpath('/html/body/main/div/div/div[2]/div/form/div[7]/button') | |
element.click() | |
select = Select(selenium.find_element_by_xpath('//*[@id="search_configuration_sortModel_sortBy"]')) | |
select.select_by_value('2') | |
element = selenium.find_element_by_xpath('/html/body/main/div/div/div[2]/div[1]/form/div[7]/button') | |
element.click() | |
element = WebDriverWait(selenium, 20).until(EC.element_to_be_clickable( | |
(By.XPATH, '/html/body/main/div/div/div[2]/div[2]/div/footer/div/button'))) | |
element.click() | |
elements = selenium.find_elements_by_xpath( | |
'//*[@id="anchor"]/div/div[2]/div[1]/div[3]/div[@class="offre offer"]') | |
output = [] | |
for e in elements: | |
price = e.find_element_by_xpath('./header/div/span/span') | |
title = e.find_element_by_xpath('./main/article/div[3]/h4') | |
image = e.find_element_by_xpath('./main/article/div[2]/a/figure/img').get_attribute('src') | |
competitor_url = e.find_element_by_xpath('./main/article/div[2]/a').get_attribute('href') | |
biogaz_percentage = e.find_element_by_xpath('./main/article/div[3]/div/div[2]/span[2]').text.replace('%', | |
'').strip() | |
output.append({'title': title.text, 'price': price.text, 'image': image, 'biogaz_percentage': biogaz_percentage, | |
'competitor_url': competitor_url}) | |
print(title.text) | |
print(price.text) | |
print('-----') | |
if __name__ == '__main__': | |
go() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment