Skip to content

Instantly share code, notes, and snippets.

@sharan-naribole
Last active January 5, 2017 21:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sharan-naribole/f9598068a4e4cdea9dc500e164473675 to your computer and use it in GitHub Desktop.
Save sharan-naribole/f9598068a4e4cdea9dc500e164473675 to your computer and use it in GitHub Desktop.
def parse(self,response):
self.logger.info("Visited %s",response.url)
for submission in response.css('.comments'):
#Include only submissions above lower limit
comment_parse = submission.css('a::text').extract_first().split()
self.submission_count += 1
self.logger.info("Submission Count = %s", str(self.submission_count))
if len(comment_parse)==1:
Ncomments = 0
else:
Ncomments = int(comment_parse[0])
if(Ncomments >= self.Ncomments_lower):
item = FlairsItem()
item['comments'] = Ncomments
comments_href = submission.css('a::attr(href)').extract_first() + "?sort=top&limit=" + str(self.Ncomments_upper) #limiting to top 300 comments in a submission
item['link'] = response.urljoin(comments_href)
request = scrapy.Request(response.urljoin(comments_href),callback = self.parse_submission)
request.meta['item'] = item
yield(request)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment