Skip to content

Instantly share code, notes, and snippets.

@siayi
Created October 28, 2021 05:42
Show Gist options
  • Save siayi/10ce946199ac4c168c646d863dbd3a83 to your computer and use it in GitHub Desktop.
Save siayi/10ce946199ac4c168c646d863dbd3a83 to your computer and use it in GitHub Desktop.
# Copyright (c) 2021. THIS SOURCE CODE BELONGS TO WB&C/SMARTEEZ INC. ANY OUTSIDER REPLICATION OF IT IS LEGALLY PERSECUTED.
# _______ ________ ________ _______ ________ ________
# |\ ___ \ |\ ___ \|\ ___ \|\ ___ \ |\ ____\|\ __ \
# \ \ __/|\ \ \\ \ \ \ \_|\ \ \ __/|\ \ \___|\ \ \|\ \
# \ \ \_|/_\ \ \\ \ \ \ \ \\ \ \ \_|/_\ \_____ \ \ __ \
# \ \ \_|\ \ \ \\ \ \ \ \_\\ \ \ \_|\ \|____|\ \ \ \ \ \
# \ \_______\ \__\\ \__\ \_______\ \_______\____\_\ \ \__\ \__\
# \|_______|\|__| \|__|\|_______|\|_______|\_________\|__|\|__|
# \|_________|
#
#
# \ ___ ___ _______ ________ _________ _______
# |\ / /|\ ___ \ |\ ___ \|\___ ___\\ ___ \
# \ \ \ / / | \ __/|\ \ \\ \ \|___ \ \_\ \ __/|
# \ \ \/ / / \ \ \_|/_\ \ \\ \ \ \ \ \ \ \ \_|/__
# \ \ / / \ \ \_|\ \ \ \\ \ \ \ \ \ \ \ \_|\ \
# \ \__/ / \ \_______\ \__\\ \__\ \ \__\ \ \_______\
# \|__|/ \|_______|\|__| \|__| \|__| \|_______|
#
#
#
# _______ ________ ___ ___ ________ ________ _______
# |\ ___ \ |\ ___ \ |\ \ |\ \|\ ____\|\ ___ \|\ ___ \
# \ \ __/|\ \ \\ \ \ \ \ \ \ \ \ \ \___|\ \ \\ \ \ \ __/|
# \ \ \_|/_\ \ \\ \ \ \ \ \ \ \ \ \ \ __\ \ \\ \ \ \ \_|/__
# \ \ \_|\ \ \ \\ \ \ \ \ \____\ \ \ \ \|\ \ \ \\ \ \ \ \_|\ \
# \ \_______\ \__\\ \__\ \ \_______\ \__\ \_______\ \__\\ \__\ \_______\
# \|_______|\|__| \|__| \|_______|\|__|\|_______|\|__| \|__|\|_______|
import csv
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.select import Select
from selenium.webdriver.support.ui import WebDriverWait
from tqdm import tqdm
def go():
output = []
for postal_code in tqdm(['69001', '75000', '06000', '05000', '15000', '63110']):
print('pay attention 1')
for car in tqdm([2000, 5000, 15000, 25000, 50000, 80000, 100000, 150000, 300000]):
firefox_options = Options()
# firefox_options.add_argument('--headless')
selenium = webdriver.Firefox(
executable_path="/home/zerocold/Downloads/geckodriver-v0.26.0-linux64/geckodriver",
options=firefox_options)
selenium.get('https://comparateur-offres.energie-info.fr/')
element = selenium.find_element_by_xpath('/html/body/header/div/div[4]/label[2]/span')
element.click()
element = WebDriverWait(selenium, 20).until(
EC.element_to_be_clickable((By.XPATH, '/html/body/div/div/div/footer/div/button[1]')))
element.click()
sleep(3)
element = WebDriverWait(selenium, 20).until(
EC.element_to_be_clickable((By.XPATH, '//*[@id="profil_zipcode"]')))
element.send_keys(postal_code)
select = Select(WebDriverWait(selenium, 20).until(
EC.element_to_be_clickable((By.XPATH, '//*[@id="profil_cityId"]'))))
# select = Select(selenium.find_element_by_id('profil_cityId'))
for o in select.options:
if postal_code == '05000':
if o.get_attribute("data-zipcode") == postal_code and o.get_attribute('value') == '1822':
o.click()
elif postal_code == '':
pass
else:
if o.get_attribute("data-zipcode") == postal_code:
o.click()
element = WebDriverWait(selenium, 20).until(EC.element_to_be_clickable(
(By.XPATH, '/html/body/main/div/div/div[2]/div[2]/form/div[3]/div[2]/div[1]/label[2]/span')))
element.click()
element = selenium.find_element_by_xpath('/html/body/main/div/div/div[2]/div[2]/form/div[4]/button')
element.click()
sleep(1)
select = Select(selenium.find_element_by_xpath('//*[@id="gas_consumption_consumptionType"]'))
select.select_by_value("1")
# element = selenium.find_element_by_xpath('//*[@id="profil_energyType"]/label[2]/span/i[2]')
# element.click()
element = selenium.find_element_by_xpath('//*[@id="gas_consumption_consumption"]')
element.send_keys(str(car))
element.send_keys(Keys.TAB)
element = selenium.find_element_by_xpath('/html/body/main/div/div/div[2]/div/form/div[7]/button')
element.click()
select = Select(selenium.find_element_by_xpath('//*[@id="search_configuration_sortModel_sortBy"]'))
select.select_by_value('2')
element = selenium.find_element_by_xpath('/html/body/main/div/div/div[2]/div[1]/form/div[7]/button')
element.click()
element = WebDriverWait(selenium, 20).until(EC.element_to_be_clickable(
(By.XPATH, '/html/body/main/div/div/div[2]/div[2]/div/footer/div/button')))
element.click()
elements = selenium.find_elements_by_xpath(
'//*[@id="anchor"]/div/div[2]/div[1]/div[3]/div[@class="offre offer"]')
for e in elements:
price = e.find_element_by_xpath('./header/div/span/span')
title = e.find_element_by_xpath('./main/article/div[3]/h4')
image = e.find_element_by_xpath('./main/article/div[2]/a/figure/img').get_attribute('src')
competitor_url = e.find_element_by_xpath('./main/article/div[2]/a').get_attribute('href')
biogaz_percentage = e.find_element_by_xpath('./main/article/div[3]/div/div[2]/span[2]').text.replace(
'%',
'').strip()
output.append(
{'postal_code': postal_code, 'car': car, 'title': title.text, 'price': price.text, 'image': image,
'biogaz_percentage': biogaz_percentage,
'competitor_url': competitor_url})
print(title.text)
print(price.text)
print('-----')
selenium.close()
keys = output[0].keys()
with open('people.csv', 'w', newline='') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(output)
def go_2():
firefox_options = Options()
# firefox_options.add_argument('--headless')
selenium = webdriver.Firefox(executable_path="/home/zerocold/Downloads/geckodriver-v0.26.0-linux64/geckodriver",
options=firefox_options)
selenium.get('https://comparateur-offres.energie-info.fr/')
element = selenium.find_element_by_xpath('/html/body/header/div/div[4]/label[2]/span')
element.click()
element = WebDriverWait(selenium, 20).until(
EC.element_to_be_clickable((By.XPATH, '/html/body/div/div/div/footer/div/button[1]')))
element.click()
element = selenium.find_element_by_xpath('//*[@id="profil_zipcode"]')
element.send_keys('80100')
select = Select(WebDriverWait(selenium, 20).until(
EC.element_to_be_clickable((By.XPATH, '//*[@id="profil_cityId"]'))))
# select = Select(selenium.find_element_by_id('profil_cityId'))
for o in select.options:
if o.get_attribute("data-zipcode") == '80100':
o.click()
element = WebDriverWait(selenium, 20).until(EC.element_to_be_clickable(
(By.XPATH, '/html/body/main/div/div/div[2]/div[2]/form/div[3]/div[2]/div[1]/label[2]/span')))
element.click()
element = selenium.find_element_by_xpath('/html/body/main/div/div/div[2]/div[2]/form/div[4]/button')
element.click()
sleep(1)
select = Select(selenium.find_element_by_xpath('//*[@id="gas_consumption_consumptionType"]'))
select.select_by_value("1")
# element = selenium.find_element_by_xpath('//*[@id="profil_energyType"]/label[2]/span/i[2]')
# element.click()
element = selenium.find_element_by_xpath('//*[@id="gas_consumption_consumption"]')
element.send_keys('82.5')
element.send_keys(Keys.TAB)
element = selenium.find_element_by_xpath('/html/body/main/div/div/div[2]/div/form/div[7]/button')
element.click()
select = Select(selenium.find_element_by_xpath('//*[@id="search_configuration_sortModel_sortBy"]'))
select.select_by_value('2')
element = selenium.find_element_by_xpath('/html/body/main/div/div/div[2]/div[1]/form/div[7]/button')
element.click()
element = WebDriverWait(selenium, 20).until(EC.element_to_be_clickable(
(By.XPATH, '/html/body/main/div/div/div[2]/div[2]/div/footer/div/button')))
element.click()
elements = selenium.find_elements_by_xpath(
'//*[@id="anchor"]/div/div[2]/div[1]/div[3]/div[@class="offre offer"]')
output = []
for e in elements:
price = e.find_element_by_xpath('./header/div/span/span')
title = e.find_element_by_xpath('./main/article/div[3]/h4')
image = e.find_element_by_xpath('./main/article/div[2]/a/figure/img').get_attribute('src')
competitor_url = e.find_element_by_xpath('./main/article/div[2]/a').get_attribute('href')
biogaz_percentage = e.find_element_by_xpath('./main/article/div[3]/div/div[2]/span[2]').text.replace('%',
'').strip()
output.append({'title': title.text, 'price': price.text, 'image': image, 'biogaz_percentage': biogaz_percentage,
'competitor_url': competitor_url})
print(title.text)
print(price.text)
print('-----')
if __name__ == '__main__':
go()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment