Created
June 28, 2017 08:44
-
-
Save erika-dike/59d71f3369f54dd90748a529526eb173 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.common.exceptions import ( | |
ElementNotVisibleException | |
) | |
def get_all_download_links(driver, url): | |
'''Visits a page and retrieves all download links using regex''' | |
driver.get(url) | |
try: | |
while True: | |
button = driver.wait.until(EC.presence_of_element_located( | |
(By.CLASS_NAME, "loadmore"))) | |
button.click() | |
except ElementNotVisibleException: | |
pass | |
matches = re.findall( | |
r'(?<=href=\")/download/.+.jpg(?=\")', driver.page_source) | |
return matches |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment