Skip to content

Instantly share code, notes, and snippets.

@ioness
Created October 18, 2018 16:27
Show Gist options
  • Save ioness/f548552c2a29754b5a2085cf83ffb845 to your computer and use it in GitHub Desktop.
Save ioness/f548552c2a29754b5a2085cf83ffb845 to your computer and use it in GitHub Desktop.
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
def data(driver, links):
data = []
for link in links:
data.append(get(driver, link))
return data
def get(driver, link):
data = ['', '', '', '', '', '', '']
arr = []
arr2 = []
try:
driver.get(link)
# name
arr = driver.find_elements_by_xpath('//*[@id="maincol"]/div[1]/div[2]/div[2]/b')
if len(arr) > 0:
data[0] = arr[0].text.split('\n')[0]
# type and year
arr = driver.find_elements_by_xpath('//*[@id="maincol"]/div[1]/div[2]/div[2]/b/span')
if len(arr) > 0:
data[1] = arr[0].text
# email
arr = driver.find_elements_by_xpath('//*[@id="personal_info"]//li[@class="ico_email"]/b/a')
if len(arr) > 0:
data[2] = arr[0].get_attribute('href').split(':')[1]
# ico_other
arr = driver.find_elements_by_xpath('//*[@id="personal_info"]//li[@class="ico_other"]')
for item in arr:
if 'Nationalities:' in item.text:
data[3] = item.find_element_by_xpath('./b').text
if 'School/Program:' in item.text:
data[4] = item.find_element_by_xpath('./b').text
if 'Degree:' in item.text:
data[5] = item.find_element_by_xpath('./b').text
if 'Graduation:' in item.text:
arr2 = item.find_elements_by_xpath('./b')
for d in arr2:
data[6] = arr2[0].text + ' ' + arr2[1].text
except:
return []
return data
from selenium import webdriver
def driver():
option = webdriver.ChromeOptions()
option.add_argument(' — incognito')
return webdriver.Chrome(executable_path='/path/to/chromedriver', chrome_options=option)
import csv
def exp (data):
with open('/foo/g19.csv', "w") as output:
writer = csv.writer(output, lineterminator='\n')
writer.writerows(data)
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException
import time
def list(driver):
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, '//tbody[@id="divAllItems"]/div[1]/div/div[1]/div[1]/div[2]/h4/a')))
open = driver.find_element_by_xpath('//tbody[@id="divAllItems"]/div[1]/div/div[1]/div[1]/div[2]/h4/a')
open.click()
WebDriverWait(driver, 10000).until(EC.visibility_of_element_located((By.XPATH, '//*[@id="primary-modal"]/div/div/div/div[1]/div[2]/div[2]/div/a')))
driver.execute_script("document.getElementById('primary-modal').scrollTo(0, document.getElementById('primary-modal').scrollHeight)")
old = None
new = None
next = None
links = []
n = 0
ignored_exceptions = (NoSuchElementException,StaleElementReferenceException,)
while(n < 9000):
WebDriverWait(driver, 1000, ignored_exceptions=ignored_exceptions).until(element_is_new((By.XPATH, '//*[@id="primary-modal"]/div/div/div/div[1]/div[2]/div[2]/div/a'), old))
new = driver.find_element_by_xpath('//*[@id="primary-modal"]/div/div/div/div[1]/div[2]/div[2]/div/a').get_attribute('href')
links.append(new)
next = driver.find_element_by_xpath('//*[@id="profile-btn--next"]')
next.click()
n = n + 1
old = new
return links
class element_is_new(object):
def __init__(self, locator, href):
self.locator = locator
self.href = href
def __call__(self, driver):
element = driver.find_element(*self.locator) # Finding the referenced element
if self.href != element.get_attribute("href"):
return element
else:
return False
from mod.driver import driver
from mod.pre import pre
from mod.list import list
from mod.scroll import scroll
from mod.data import data
from mod.exp import exp
try:
driver = driver()
pre(driver)
scroll(driver)
list = list(driver)
data = data(driver, list)
exp(data)
except Exception as e:
print(e)
print('Hello, exception')
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def pre(driver):
driver.get('https://foo_url')
login_page_link = driver.find_element_by_xpath('//div[@class="ctnLogin"]/p[@class="center"]/a')
driver.get(login_page_link.get_attribute('href'))
driver.find_element_by_id('userNameInput').send_keys('usuario')
driver.find_element_by_id('passwordInput').send_keys('contrasena')
driver.find_element_by_id('submitButton').submit()
driver.get('https://foo_url2')
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
def scroll(driver):
old = 0
new = driver.execute_script("window.scrollTo(0, document.body.scrollHeight);return document.body.scrollHeight;")
while(old!=new):
WebDriverWait(driver, 20).until(element_has_not_text((By.XPATH, '//div[@id="generic"]'), 'Loading Directory...'))
old = new
new = driver.execute_script("window.scrollTo(0, document.body.scrollHeight);return document.body.scrollHeight;")
driver.execute_script("window.scrollTo(0, 0);return 1;")
time.sleep(5)
class element_has_not_text(object):
def __init__(self, locator, text):
self.locator = locator
self.text = text
def __call__(self, driver):
element = driver.find_element(*self.locator) # Finding the referenced element
if self.text != element.get_attribute("textContent"):
return element
else:
return False
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment