Skip to content

Instantly share code, notes, and snippets.

@alex4hoang
Last active May 13, 2021 19:21
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 5 You must be signed in to fork a gist
  • Save alex4hoang/9a7ae9813311ec1c6f7c0b0ca622d006 to your computer and use it in GitHub Desktop.
Save alex4hoang/9a7ae9813311ec1c6f7c0b0ca622d006 to your computer and use it in GitHub Desktop.
#!/bin/python
# -*- coding: utf-8 -*-
from time import sleep
from random import randint
from selenium import webdriver
from pyvirtualdisplay import Display
class MuncherySpider():
def __init__(self):
self.url_to_crawl = "https://munchery.com/"
self.all_items = []
# Open headless chromedriver
def start_driver(self):
print('starting driver...')
self.display = Display(visible=0, size=(800, 600))
self.display.start()
self.driver = webdriver.Chrome("/var/chromedriver/chromedriver")
sleep(4)
# Close chromedriver
def close_driver(self):
print('closing driver...')
self.display.stop()
self.driver.quit()
print('closed!')
# Tell the browser to get a page
def get_page(self, url):
print('getting page...')
self.driver.get(url)
sleep(randint(2,3))
# Munchery front gate page
def login(self):
print('getting pass the gate page...')
try:
form = self.driver.find_element_by_xpath('//*[@class="signup-login-form"]')
form.find_element_by_xpath('.//*[@class="user-input email"]').send_keys('iam@alexhoang.net')
form.find_element_by_xpath('.//*[@class="user-input zip-code"]').send_keys('94011')
form.find_element_by_xpath('.//*[@class="large orange button"]').click()
sleep(randint(3,5))
except Exception:
pass
def grab_list_items(self):
print('grabbing list of items...')
for div in self.driver.find_elements_by_xpath('//ul[@class="menu-items row"]//li'):
data = self.process_elements(div)
if data:
self.all_items.append(data)
else:
pass
def process_elements(self, div):
prd_image = ''
prd_title = ''
prd_price = ''
try:
prd_image = div.find_element_by_xpath('.//*[@class="photo item-photo"]').get_attribute("source")
prd_title = div.find_element_by_xpath('.//*[@class="text ng-binding"]').text
prd_price = div.find_element_by_xpath('.//*[@class="price ng-scope ng-binding"]').text
except Exception:
pass
if prd_image and prd_title and prd_price:
single_item_info = {
'image': prd_image.encode('UTF-8'),
'title': prd_title.encode('UTF-8'),
'price': prd_price.encode('UTF-8')
}
return single_item_info
else:
return False
def parse(self):
self.start_driver()
self.get_page(self.url_to_crawl)
self.login()
self.grab_list_items()
self.close_driver()
if self.all_items:
return self.all_items
else:
return False, False
# Run spider
Munchery = MuncherySpider()
items_list = Munchery.parse()
# Do something with the data touched
for item in items_list:
print(item)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment