Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save riyenas0925/b47c3932e96715db80b81881c6524506 to your computer and use it in GitHub Desktop.
Save riyenas0925/b47c3932e96715db80b81881c6524506 to your computer and use it in GitHub Desktop.
EBSi Q&A 파싱
from selenium import webdriver
from bs4 import BeautifulSoup
#selector 번호 배열
korean = [1, 2, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27]
koreanList = []
# selenium webdriver
driver = webdriver.Chrome('chromedriver')
driver.implicitly_wait(3)
# EBSi Q&A 페이지 접속
driver.get('http://www.ebsi.co.kr/ebs/pot/potu/retrieveQnaArticleList.ebs')
# EBSi Q&A table 접속
for i in korean:
driver.find_element_by_css_selector('#reNcontents > div.learningCoaching2012 > div.contsArea > form > div.clearArea > table > tbody > tr:nth-child(' + str(i) + ') > td.tit > a').click()
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
koreanList.append(soup.find("td", {"class": "cont"}).text.replace(" ", "").replace(" ", ""))
driver.back()
print(koreanList)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment