Skip to content

Instantly share code, notes, and snippets.

@moaikim

moaikim/main.py Secret

Created November 27, 2020 21:53
Show Gist options
  • Save moaikim/cc0135edd3180b406256f78764ff1e08 to your computer and use it in GitHub Desktop.
Save moaikim/cc0135edd3180b406256f78764ff1e08 to your computer and use it in GitHub Desktop.
Related Keywords - Crawling
import argparse
import time
from search_manager import SearchManager
def search(search_manager, where, keyword):
result = []
if where == 'google':
result = search_manager.searchGoogleRelatedKeywords(keyword)
elif where == 'naver':
result = search_manager.searchNaverRelatedKeywords(keyword)
elif where == 'daum':
result = search_manager.searchDaumRelatedKeywords(keyword)
return result
def main():
parser = argparse.ArgumentParser()
parser.add_argument('where', type=str, choices=['google','naver','daum'], default='google', help="Choose where to search")
parser.add_argument('keyword', type=str, help="Enter keywords to crawl")
args = parser.parse_args()
try:
search_manager = SearchManager()
result = search(search_manager, args.where, args.keyword)
print (result)
while True:
time.sleep(1)
except KeyboardInterrupt:
search_manager.close()
if __name__=="__main__":
main()
from selenium.webdriver import Chrome
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
class SearchManager():
def __init__(self):
self.driver = Chrome()
self.wait = WebDriverWait(self.driver, 1)
self.driver.maximize_window()
def _move_end(self):
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
def _click(self, xpath):
try:
self.wait.until(ec.element_to_be_clickable((By.XPATH, xpath)))
self.driver.find_element_by_xpath(xpath).click()
return True
except:
return False
def _insert(self, xpath, value):
try:
self.wait.until(ec.visibility_of_all_elements_located((By.XPATH, xpath)))
self.driver.find_element_by_xpath(xpath).send_keys(value)
return True
except:
return False
def _read(self, xpath):
try:
self.wait.until(ec.visibility_of_all_elements_located((By.XPATH, xpath)))
text = self.driver.find_element_by_xpath(xpath).text
return text.split()
except:
return []
def _reads(self, xpath):
try:
self.wait.until(ec.visibility_of_all_elements_located((By.XPATH, xpath)))
elements = self.driver.find_elements_by_xpath(xpath)
texts = []
for element in elements:
texts.append(element.text)
return texts
except:
return []
def start(self, url):
self.driver.get(url)
def close(self):
self.driver.close()
def searchGoogleRelatedKeywords(self, query):
self.start('https://www.google.com')
if not self._insert('//*[@id="tsf"]/div[2]/div[1]/div[1]/div/div[2]/input', query):
return []
if not self._click('//*[@id="tsf"]/div[2]/div[1]/div[3]/center/input[1]'):
return []
self._move_end()
keywords = self._reads('//*[@id="brs"]/g-section-with-header/div[2]/div/p[@class="nVcaUb"]')
return keywords
def searchNaverRelatedKeywords(self, query):
self.start('https://www.naver.com')
if not self._insert('//*[@id="query"]', query):
return []
if not self._click('//*[@id="search_btn"]'):
return []
self._move_end()
keywords = self._reads('//*[@id="nx_footer_related_keywords"]/div/div[2]/ul/li[@class="item"]/a/div[@class="tit"]')
return keywords
def searchDaumRelatedKeywords(self, query):
self.start('https://www.daum.net')
if not self._insert('//*[@id="q"]', query):
return []
if not self._click('//*[@id="daumSearch"]/fieldset/div/div/button[2]'):
return []
self._click('//*[@id="netizen_more_btn_bottom"]')
keywords = self._reads('//*[@id="netizen_lists_bottom"]/span[@class="wsn"]')
return keywords
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment