Skip to content

Instantly share code, notes, and snippets.

@lhoupert
Created February 27, 2023 14:57
Show Gist options
  • Save lhoupert/8abce8cefb35a8738f1d62b08c11bdfa to your computer and use it in GitHub Desktop.
Save lhoupert/8abce8cefb35a8738f1d62b08c11bdfa to your computer and use it in GitHub Desktop.
Short script to test web scraping on the Telepac Website
# Script to test web scraping on Telepac website.
# Adapted from blog article (https://www.scrapingbee.com/blog/selenium-python/)
# L Houpert Dec. 2020
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import os
import time
def latest_download_file(dl_path):
wd = os.getcwd()
os.chdir(dl_path)
files = sorted(os.listdir(dl_path), key=os.path.getmtime)
newest = files[-1]
os.chdir(wd)
return newest
current_path = os.path.abspath(os.getcwd())
DRIVER_PATH = '../chromedriver'
CODE_POSTAL = '578**'
CSV_LOC = current_path + "/csv/dl/"
options = Options()
options.headless = True
options.add_argument("--window-size=1400,800")
prefs = {"download.default_directory" : CSV_LOC}
options.add_experimental_option("prefs",prefs)
driver = webdriver.Chrome(options=options, executable_path=DRIVER_PATH)
driver.get("https://www3.telepac.agriculture.gouv.fr/telepac/tbp/feader/afficher.action")
codepostal = driver.find_element_by_xpath("//input[@id='rechercheExploitant_codePostal']").send_keys(CODE_POSTAL)
submit = driver.find_element_by_xpath("//a[@id='submit.button.1']").click()
# dont forget from selenium.common.exceptions import NoSuchElementException
try:
logout_button = driver.find_element_by_id("rechercher_")
print('Successfull request')
except NoSuchElementException:
print('No entries')
savecsvfile = driver.find_element_by_xpath("//a[@id='rechercher_']").click()
fileends = "crdownload"
while "crdownload" == fileends:
time.sleep(0.5)
newest_file = latest_download_file(CSV_LOC)
print(newest_file + '\n')
if "crdownload" in newest_file:
fileends = "crdownload"
else:
fileends = "none"
print("file downloaded")
# from datetime import date
# dt = date.today().strftime("%Y%m%d%H%M%S")
newcsvname = "Telepac-2018-" + CODE_POSTAL + '.csv' #+ "-" dt
os.rename(CSV_LOC + newest_file, current_path + '/csv/' + newcsvname)
# from pathlib import Path
# Path("path/to/current/file.foo").rename("path/to/new/destination/for/file.foo")
print("file renamed")
driver.save_screenshot('screenshot1.png')
javaScript = "window.scrollBy(0,1000);"
driver.execute_script(javaScript)
driver.save_screenshot('screenshot2.png')
#print(driver.page_source)
driver.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment