Skip to content

Instantly share code, notes, and snippets.

@arthurtalkgoal
Forked from irfani/selenium_spider.py
Created October 12, 2015 16:59
Show Gist options
  • Save arthurtalkgoal/d11eb3f6fde31af729c6 to your computer and use it in GitHub Desktop.
Save arthurtalkgoal/d11eb3f6fde31af729c6 to your computer and use it in GitHub Desktop.
Scrapyd with Selenium Spider
from selenium import selenium
from scrapy.spider import BaseSpider
from scrapy.http import Request
import time
import lxml.html
class SeleniumSprider(BaseSpider):
name = "selenium"
allowed_domains = ['selenium.com']
start_urls = ["http://localhost"]
def __init__(self, **kwargs):
print kwargs
self.sel = selenium("localhost", 4444, "*firefox","http://selenium.com/")
self.sel.start()
def parse(self, response):
sel = self.sel
sel.open("/index.aspx")
sel.click("id=radioButton1")
sel.select("genderOpt", "value=male")
sel.type("nameTxt", "irfani")
sel.click("link=Submit")
time.sleep(1) #wait a second for page to load
root = lxml.html.fromstring(sel.get_html_source())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment