Skip to content

Instantly share code, notes, and snippets.

Created November 10, 2014 09:27
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
What would you like to do?
from scrapy.xlib.pydispatch import dispatcher
from scrapy import signals
from scrapy.http import FormRequest
from scrapy.selector import Selector
from spider.spiders.basic import StudentSpider
from spider.items import StudentItem
from scrapy import log
class XPATHS:
STUDENTS_ROWS = '//*[@id="results"]/center/table/tbody/tr[3]'
STUDENT_NAME = './td/form/input[@name="submit"]/@value'
STUDENT_EMAIL = './td/form/input[@id="email"]/@value'
STUDENT_STATUS = './td/form/input[@id="rolestring"]/@value'
class SamSpider(StudentSpider):
name = 'sam'
start_urls = ['']
def __init__(self, *args, **kwargs):
super(SamSpider, self).__init__(*args, **kwargs)
self.filter_role = kwargs.get('filter_role', '')
dispatcher.connect(self.idle, signals.spider_idle)
def idle(self, spider):
if "progress_total" in self.state:
self.state['progress_current'] = self.state['progress_total']
def parse(self, response):
phrases = self.get_search_phrases()
self.state['progress_current'] = 0
self.state['progress_total'] = len(phrases)
for phrase in phrases:
yield self.get_search_request(response, phrase)
def get_search_request(self, response, phrase):
return FormRequest.from_response(response,
formxpath = '//*[@id="search"]',
'searchChar': phrase
def people(self, response):
def lget(x, index, default):
return x[index].strip() if len(x) > index else default
self.state['progress_current'] += 1
sel = Selector(response)
students = sel.xpath(XPATHS.STUDENTS_ROWS)
# print(sel.xpath('//*[@class="content"]').extract())
for student in students:
yield StudentItem(
name=lget(student.xpath(XPATHS.STUDENT_NAME).extract(), 0, ''),
email=lget(student.xpath(XPATHS.STUDENT_EMAIL).extract(), 0, ''),
enrollment=lget(student.xpath(XPATHS.STUDENT_STATUS).extract(), 0, '')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment