Skip to content

Instantly share code, notes, and snippets.

@freestok
Created September 4, 2020 10:42
Show Gist options
  • Save freestok/ac4d43f77fca9ec4e88eaa865518c70b to your computer and use it in GitHub Desktop.
Save freestok/ac4d43f77fca9ec4e88eaa865518c70b to your computer and use it in GitHub Desktop.
import os, time, re
from os.path import join
from selenium import webdriver
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import ElementClickInterceptedException
from webdriver_manager.chrome import ChromeDriverManager
def click_wait(driver, name):
while True:
try:
driver.find_element_by_id(name).click()
break
except (StaleElementReferenceException, NoSuchElementException,
ElementClickInterceptedException):
time.sleep(1)
src = driver.page_source
text_found = re.search(r'Your search returned no results',src)
if text_found:
return 'skip'
def wait_for_download(start):
downloads = r'C:\Users\kfreestone\Downloads'
while True:
for f in os.listdir(downloads):
if f.endswith('.csv'):
if os.path.getmtime(join(downloads,f)) > start:
print('\t','BREAK')
return join(downloads, f)
else:
pass
else:
pass
time.sleep(1)
def scrape_accela(start, end):
print('Scraping Accela...')
start_time = time.time()
# driver = webdriver.Chrome(executable_path='chromedriver85.exe')
driver = webdriver.Chrome(ChromeDriverManager().install())
url = 'https://inspections.grcity.us/CitizenAccess/Cap/CapHome.aspx?module=Enforcement&TabName=Home'
driver.get(url)
start_date_id = 'ctl00_PlaceHolderMain_generalSearchForm_txtGSStartDate'
end_date_id = 'ctl00_PlaceHolderMain_generalSearchForm_txtGSEndDate'
start_date = driver.find_element_by_id(start_date_id)
end_date = driver.find_element_by_id(end_date_id)
print(start)
print(end)
start_date.send_keys(start)
end_date.send_keys(end)
click_wait(driver, 'ctl00_PlaceHolderMain_btnNewSearch')
download_time = time.time()
search_id = 'ctl00_PlaceHolderMain_dgvPermitList_gdvPermitList_gdvPermitListtop4btnExport'
skip = click_wait(driver, search_id)
if skip:
return None
download = wait_for_download(download_time)
end_time = time.time()
print((end_time - start_time))
driver.quit()
print(download)
return download
if __name__ == '__main__':
scrape_accela(start='06012020',end='06302020')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment