Skip to content

Instantly share code, notes, and snippets.

@suminb
Last active July 2, 2018 17:17
Show Gist options
  • Save suminb/53681b007ba28b7d3a760a437cd92a60 to your computer and use it in GitHub Desktop.
Save suminb/53681b007ba28b7d3a760a437cd92a60 to your computer and use it in GitHub Desktop.
Demo for a lecture at WISET (2018-05-11)
"""웹브라우저 띄워서 가져오는 예제 (느림)"""
import csv
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Firefox()
def main():
driver.get('https://www.google.com?hl=en')
search = driver.find_element_by_xpath("//input[@title='Search']")
search.send_keys('한국여성과학인지원센터')
search.send_keys(Keys.RETURN)
input('Move to News section ')
news = driver.find_element_by_xpath("//a[text()='News']")
news.click()
input('Collect titles ')
titles = driver.find_elements_by_xpath("//div[@class='g']/div/div/h3/a")
for title in titles:
print(title.get_attribute('text'))
save_as_file(titles, 'results.csv')
def save_as_file(results, filename):
with open(filename, 'w') as fout:
writer = csv.writer(
fout, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer.writerow(['Title', 'Link'])
for result in results:
row = [result.get_attribute('text'), result.get_attribute('href')]
writer.writerow(row)
if __name__ == '__main__':
try:
main()
finally:
driver.close()
"""아무런 창도 띄우지 않고 바로 가져오는 예제"""
import csv
from urllib.parse import quote
from bs4 import BeautifulSoup
import requests
def process(keyword, filename):
url = 'https://www.google.co.kr/search'
params = {
'tbm': 'nws',
'source': 'lnms',
'prmd': 'inmv',
'sa': 'X',
# 'biw': 400,
# 'bih': 616,
# 'dpr': 2,
# 'rlz': '1C5CHFA_enKR728KR730',
# 'ved': '0ahUKEwjovsKn7oDcAhWBd94KHXaEC3gQ_AUIEigC',
'q': keyword,
}
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
}
resp = requests.get(url, params=params, headers=headers)
soup = BeautifulSoup(resp.content)
headers = soup.select('h3.r a')
save_as_file(headers, filename)
def save_as_file(headers, filename):
with open(filename, 'w') as fout:
writer = csv.writer(
fout, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer.writerow(['Title', 'Link'])
for header in headers:
row = [header.text, header.get('href')]
writer.writerow(row)
if __name__ == '__main__':
process('한국여성과학기술인지원센터', 'results.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment