Skip to content

Instantly share code, notes, and snippets.

@hongdonghyun
Created February 29, 2020 08:00
Show Gist options
  • Save hongdonghyun/26e8e7a0340f8e7be346a2a90be41888 to your computer and use it in GitHub Desktop.
Save hongdonghyun/26e8e7a0340f8e7be346a2a90be41888 to your computer and use it in GitHub Desktop.
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
if __name__ == '__main__':
browser = webdriver.Chrome('./chromedriver')
browser.implicitly_wait(5)
browser.get("http://prod.danawa.com/list/?cate=112758&15main_11_02")
WebDriverWait(browser, 3).until(
EC.presence_of_element_located((By.XPATH, '//*[@id="frmProductList"]/div/div[1]/div[2]/button'))).click()
WebDriverWait(browser, 3).until(
EC.presence_of_element_located((By.XPATH, '// *[ @ id = "dlMaker_extend"] / dd / div[2] / button[1]'))).click()
WebDriverWait(browser, 3).until(
EC.presence_of_element_located((By.XPATH, '//*[@id="selectMaker_extend_priceCompare_A"]/li[13]/label'))).click()
time.sleep(4)
soup = BeautifulSoup(browser.page_source, 'html.parser')
pro_list = soup.select("div.main_prodlist.main_prodlist_list > ul > li")
for v in pro_list:
if not v.find('div', class_="ad_header"):
print(v.select('p.prod_name > a')[0].text.strip())
img_tag = v.select('a.thumb_link > img')[0]
# print(img_tag)
# 해당 print문을 찍어보시면 구조가 어떻게 바뀌었는지 확인 가능합니다.
# 웹페이지의 구조는 계속 변경되기때문에 변경되는 구조에 맞추어서 크롤링을 해주셔야합니다.
if "data-original" in img_tag.attrs.keys():
print(img_tag['data-original'])
else:
print(img_tag['src'])
print(v.select('p.price_sect > a')[0].text)
print()
browser.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment