Skip to content

Instantly share code, notes, and snippets.

@shafayeatsumit
Created September 21, 2017 08:04
Show Gist options
  • Save shafayeatsumit/d3d982265c16af63810e4ad522b1a3ec to your computer and use it in GitHub Desktop.
Save shafayeatsumit/d3d982265c16af63810e4ad522b1a3ec to your computer and use it in GitHub Desktop.
from selenium import webdriver
import time
import re
import math
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
driver = webdriver.Remote(command_executor='http://127.0.0.1:4444/wd/hub', desired_capabilities=DesiredCapabilities.CHROME)
#driver = webdriver.Firefox()
root_url = "https://eservice.hsa.gov.sg/prism/ct_r/enquiry.do?action=getAllTherapeuticArea"
driver.get(root_url)
def find_number_of_items(driver):
item_number_list = []
page1_rows = driver.find_elements_by_xpath("//table[@class='fmTbl']/tbody/tr/td/a")
print ("page count%s"%len(page1_rows))
for row in range(len(page1_rows)):
page1_rows = driver.find_elements_by_xpath("//table[@class='fmTbl']/tbody/tr/td/a")
name = page1_rows[row].text
page1_rows[row].click()
time.sleep(10)
print ("sleeping")
page_count = driver.find_element_by_xpath('//*[@id="page"]/form/table[3]/tbody/tr/td/table[4]/tbody/tr/td[1]').text
page_count = int(re.search(r'\d+',page_count).group())
driver.get(root_url)
item_number_list.append(page_count)
print (item_number_list)
return item_number_list
#lis = find_number_of_items(driver)
lis = [7, 25, 13, 3, 17, 2, 8, 17, 12, 17, 4, 38, 24, 7, 264, 16, 4, 0, 5, 5, 10, 3, 5, 9, 4]
for val in lis:
page1_rows = driver.find_elements_by_xpath("//table[@class='fmTbl']/tbody/tr/td/a")
indx = lis.index(val)
page1_rows[indx].click()
time.sleep(10)
#for row in range(val):
for row in range(val):
page2_rows = driver.find_elements_by_xpath("//table[@class='fmTbl']/tbody/tr/td/a")
print ("row number",row)
#this would handle the pagination
if ((row/9.0)>1):
print ("inside if")
click_nxtpage_count = int(math.floor(row/9))
print ("nxt page count",click_nxtpage_count)
for i in range(click_nxtpage_count):
driver.find_element_by_xpath("//*[@id='page']/form/table[3]/tbody/tr/td/table[4]/tbody/tr/td[2]/a[contains(text(), '[next]')]").click()
time.sleep(10)
page2_rows = driver.find_elements_by_xpath("//table[@class='fmTbl']/tbody/tr/td/a")
row = row%9
page2_rows[row].click()
time.sleep(10)
#extract data here
driver.get(root_url)
if val-1 == row:
break
page1_rows = driver.find_elements_by_xpath("//table[@class='fmTbl']/tbody/tr/td/a")
indx = lis.index(val)
page1_rows[indx].click()
time.sleep(10)
else:
print ("inside else")
page2_rows[row].click()
time.sleep(10)
driver.get(root_url)
if val-1 == row:
break
page1_rows = driver.find_elements_by_xpath("//table[@class='fmTbl']/tbody/tr/td/a")
indx = lis.index(val)
page1_rows[indx].click()
time.sleep(10)
print ("scraped amount",row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment