Skip to content

Instantly share code, notes, and snippets.

@aijogja
Last active July 9, 2019 07:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aijogja/46aa9d1d810f0159cc9679b2938077f2 to your computer and use it in GitHub Desktop.
Save aijogja/46aa9d1d810f0159cc9679b2938077f2 to your computer and use it in GitHub Desktop.
Selenium Tokopedia Search
selenium==3.141.0
from urllib import parse
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException
PENCARIAN = {
'keyword': 'keyword',
'nama_toko': 'shop name',
'deep': '10', # limit pagination
}
def main():
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
driver = webdriver.Chrome(
executable_path='./chromedriver',
# options=chrome_options
)
driver.get('https://www.tokopedia.com')
print('Opening the browser ...')
searchbox_el = driver.find_element_by_id('search-keyword')
searchbox_el.send_keys(PENCARIAN['keyword'])
searchbox_el.submit()
print('Submit the keyword')
def pagination_clicker():
"""
Pagination Loop
"""
# wait the results
paginations = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((By.CLASS_NAME, "GUHElpkt"))
)
products_toko = driver.find_elements_by_class_name('vlEGRFVq')
# print(driver.current_url)
url = parse.urlsplit(driver.current_url)
qs = parse.parse_qs(url.query)
if 'page' in qs:
print('Page %s' % qs.get('page')[0])
if qs.get('page')[0] == PENCARIAN['deep']:
print('Selesai')
return None
else:
print('Page 1')
# list products
try:
for prd in products_toko:
product_link = prd.find_element_by_class_name('_2rQtYSxg')
if product_link.text == PENCARIAN['nama_toko']:
product_title = prd.find_element_by_tag_name('h3')
print(product_title.text)
for pagination in paginations:
if pagination.text == '>':
driver.execute_script("arguments[0].click();", pagination)
pagination_clicker()
except StaleElementReferenceException:
pagination_clicker()
pagination_clicker()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment