Skip to content

Instantly share code, notes, and snippets.

@youtube-jocoding
Created March 8, 2023 06:38
Show Gist options
  • Save youtube-jocoding/76ff9da0c053ff557ffb26efeecc60bf to your computer and use it in GitHub Desktop.
Save youtube-jocoding/76ff9da0c053ff557ffb26efeecc60bf to your computer and use it in GitHub Desktop.
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-logging"])
driver = webdriver.Chrome("C:/kefico/chromedriver.exe", options=options)
url = "https://mediahub.seoul.go.kr/news/issue/hotNewsList.do"
driver.get(url)
time.sleep(2)
pages = ['2'] + list(range(4,12))
index = 0
def nextPage(n):
# 끝까지 스크롤 다운
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
elm = driver.find_element(By.CSS_SELECTOR, f"#news_List > div > ul > li:nth-child({str(n)}) > a")
time.sleep(2)
elm.click()
time.sleep(2)
global index
index = index + 1
titles = []
summarys = []
links = []
while index < len(pages):
news_List = driver.find_element(By.CSS_SELECTOR, "#news_List")
titleTags = news_List.find_elements(By.CSS_SELECTOR, ".tit")
summaryTags = news_List.find_elements(By.CSS_SELECTOR, ".summary")
linkTags = news_List.find_elements(By.CSS_SELECTOR, ".goArticleDetail")
for tag in titleTags:
title = tag.get_attribute("innerHTML")
title = title.replace("\n","").strip()
titles.append(title)
for tag in summaryTags:
summary = tag.get_attribute("innerHTML")
summary = summary.replace("<!-- max 3줄 -->","").replace("&nbsp;"," ").strip()
summarys.append(summary)
for tag in linkTags:
js = tag.get_attribute("onclick")
link = "https://mediahub.seoul.go.kr/archives/"+js.split("'")[1]
links.append(link)
nextPage(pages[index])
f = open("seoulNews.txt", 'w', encoding="utf-8")
for i in range(0,len(titles)):
data = f"제목: {titles[i]}\n요약:{summarys[i]}\n링크:{links[i]}\n\n"
f.write(data)
f.close()
driver.close()
@youtube-jocoding
Copy link
Author

youtube-jocoding commented Mar 8, 2023

from selenium import webdriver
from selenium.webdriver.common.by import By
import time
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches", ["enable-logging"])
driver = webdriver.Chrome("C:/kefico/chromedriver.exe", options=options)
url = "https://mediahub.seoul.go.kr/news/issue/hotNewsList.do"
driver.get(url)
time.sleep(2)

pages = ['2'] + list(range(4,12))
# news_List > div > ul > li:nth-child(11) > a

index = 0
def nextPage(n):
    # 끝까지 스크롤 다운
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(1)
    elm = driver.find_element(By.CSS_SELECTOR, f"#news_List > div > ul > li:nth-child({str(n)}) > a")
    time.sleep(1)
    elm.click()
    time.sleep(1)
    global index
    index = index + 1

titles = []
summarys = []
links = []

while index <= len(pages):
    news_List = driver.find_element(By.CSS_SELECTOR, "#news_List")
    titleTags = news_List.find_elements(By.CSS_SELECTOR, ".tit")
    summaryTags = news_List.find_elements(By.CSS_SELECTOR, ".summary")
    linkTags = news_List.find_elements(By.CSS_SELECTOR, ".goArticleDetail")

    for tag in titleTags:
        title = tag.get_attribute("innerHTML")
        title = title.replace("\n","").strip()
        titles.append(title)
    for tag in summaryTags:
        summary = tag.get_attribute("innerHTML")
        summary = summary.replace("<!-- max 3줄 -->","").replace("&nbsp;"," ").strip()
        summarys.append(summary)
    for tag in linkTags:
        js = tag.get_attribute("onclick")
        link = "https://mediahub.seoul.go.kr/archives/"+js.split("'")[1]
        links.append(link)
    
    if index != len(pages):
        nextPage(pages[index])
    else:
        index = index + 1

print(len(titles))

f = open("seoulNews.txt", 'w', encoding="utf-8")
for i in range(0,len(titles)):
    data = f"제목: {titles[i]}\n요약:{summarys[i]}\n링크:{links[i]}\n\n"
    f.write(data)
f.close()
driver.close()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment