youtube-jocoding/day3-4_final.py

## day3-4_final.py
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-logging"])
driver = webdriver.Chrome("C:/kefico/chromedriver.exe", options=options)
url = "https://mediahub.seoul.go.kr/news/issue/hotNewsList.do"
driver.get(url)
time.sleep(2)

pages = ['2'] + list(range(4,12))

index = 0
def nextPage(n):
    # 끝까지 스크롤 다운
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    elm = driver.find_element(By.CSS_SELECTOR, f"#news_List > div > ul > li:nth-child({str(n)}) > a")
    time.sleep(2)
    elm.click()
    time.sleep(2)
    global index
    index = index + 1

titles = []
summarys = []
links = []

while index < len(pages):
    news_List = driver.find_element(By.CSS_SELECTOR, "#news_List")
    titleTags = news_List.find_elements(By.CSS_SELECTOR, ".tit")
    summaryTags = news_List.find_elements(By.CSS_SELECTOR, ".summary")
    linkTags = news_List.find_elements(By.CSS_SELECTOR, ".goArticleDetail")

    for tag in titleTags:
        title = tag.get_attribute("innerHTML")
        title = title.replace("\n","").strip()
        titles.append(title)
    for tag in summaryTags:
        summary = tag.get_attribute("innerHTML")
        summary = summary.replace("<!-- max 3줄 -->","").replace("&nbsp;"," ").strip()
        summarys.append(summary)
    for tag in linkTags:
        js = tag.get_attribute("onclick")
        link = "https://mediahub.seoul.go.kr/archives/"+js.split("'")[1]
        links.append(link)

    nextPage(pages[index])

f = open("seoulNews.txt", 'w', encoding="utf-8")
for i in range(0,len(titles)):
    data = f"제목: {titles[i]}\n요약:{summarys[i]}\n링크:{links[i]}\n\n"
    f.write(data)
f.close()
driver.close()
	from selenium import webdriver
	from selenium.webdriver.common.by import By
	import time
	options = webdriver.ChromeOptions()
	options.add_experimental_option("excludeSwitches", ["enable-logging"])
	driver = webdriver.Chrome("C:/kefico/chromedriver.exe", options=options)
	url = "https://mediahub.seoul.go.kr/news/issue/hotNewsList.do"
	driver.get(url)
	time.sleep(2)

	pages = ['2'] + list(range(4,12))

	index = 0
	def nextPage(n):
	# 끝까지 스크롤 다운
	driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
	elm = driver.find_element(By.CSS_SELECTOR, f"#news_List > div > ul > li:nth-child({str(n)}) > a")
	time.sleep(2)
	elm.click()
	time.sleep(2)
	global index
	index = index + 1

	titles = []
	summarys = []
	links = []

	while index < len(pages):
	news_List = driver.find_element(By.CSS_SELECTOR, "#news_List")
	titleTags = news_List.find_elements(By.CSS_SELECTOR, ".tit")
	summaryTags = news_List.find_elements(By.CSS_SELECTOR, ".summary")
	linkTags = news_List.find_elements(By.CSS_SELECTOR, ".goArticleDetail")

	for tag in titleTags:
	title = tag.get_attribute("innerHTML")
	title = title.replace("\n","").strip()
	titles.append(title)
	for tag in summaryTags:
	summary = tag.get_attribute("innerHTML")
	summary = summary.replace("<!-- max 3줄 -->","").replace(" "," ").strip()
	summarys.append(summary)
	for tag in linkTags:
	js = tag.get_attribute("onclick")
	link = "https://mediahub.seoul.go.kr/archives/"+js.split("'")[1]
	links.append(link)

	nextPage(pages[index])

	f = open("seoulNews.txt", 'w', encoding="utf-8")
	for i in range(0,len(titles)):
	data = f"제목: {titles[i]}\n요약:{summarys[i]}\n링크:{links[i]}\n\n"
	f.write(data)
	f.close()
	driver.close()