Skip to content

Instantly share code, notes, and snippets.

@Hamthoven
Last active March 7, 2023 22:37
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Hamthoven/b35c65339c72fb6ae3f0feb751e4a64b to your computer and use it in GitHub Desktop.
Save Hamthoven/b35c65339c72fb6ae3f0feb751e4a64b to your computer and use it in GitHub Desktop.
크롤링 코드
pip install selenium
import selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
driver = selenium.webdriver.Chrome()
driver.get(url='https://skbroadband.com/content/realtime/Channel_List.do?')
chlist=[]
atts=[]
driver.implicitly_wait(time_to_wait=3)
tab = driver.find_element(By.XPATH, '//*[@id="contents"]/div[2]/div[1]/div[2]/ul/li[1]/a')
for i in range(1,10):
ch = driver.find_elements(By.XPATH,'//*[@id="tab0%d"]/div[1]/ul/li/a'%i)
temp = [x.text for x in ch]
att = [y.get_attribute("onclick") for y in ch]
chlist += temp
atts += att
tab = driver.find_element(By.XPATH, '//*[@id="contents"]/div[2]/div[1]/div[2]/ul/li[%d]/a'%i)
tab.send_keys(Keys.ENTER)
for i in range(10,21):
ch = driver.find_elements(By.XPATH,'//*[@id="tab%d"]/div[1]/ul/li/a'%i)
temp = [x.text for x in ch]
att = [y.get_attribute("onclick") for y in ch]
chlist += temp
atts += att
tab = driver.find_element(By.XPATH, '//*[@id="contents"]/div[2]/div[1]/div[2]/ul/li[%d]/a'%i)
tab.send_keys(Keys.ENTER)
no = atts.copy()
no = [x.strip("'""javascript:fn_view("")"",") for x in no]
genre = [x[:4] for x in no]
no = [x[7:] for x in no]
no = list(map(int, no))
genre = list(map(int, genre))
import json
channels = []
for i in range(len(chlist)):
channels.append({
"number" : no[i],
"name" : chlist[i],
"category" : genre[i],
"link" : "",
"description" : ""
})
with open("ChannelData.json", 'w') as outfile:
json.dump(channels, outfile, indent=4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment