Skip to content

Instantly share code, notes, and snippets.

@adalenv
Created May 22, 2018 15:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save adalenv/4c05a2186af7931f523af83390bae79d to your computer and use it in GitHub Desktop.
Save adalenv/4c05a2186af7931f523af83390bae79d to your computer and use it in GitHub Desktop.
Phone Number Scraper
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on 2018-05-18 09:33:05
# Project: test1
from pyspider.libs.base_handler import *
class Handler(BaseHandler):
crawl_config = {
}
@every(minutes=24 * 60)
def on_start(self):
self.crawl('https://website/category', callback=self.index_page)
@config(age=10 * 24 * 60 * 60)
def index_page(self, response):
for i in range(1,99999):
self.crawl('https://website/category&page='+str(i),
callback=self.detail_page)
@config(priority=2)
def detail_page(self,response):
data = ''
for i in response.doc('.contentContainer > div > .groupTitle').items():
data=data+i('.groupTitle > h2').text()+','+i('.groupTitle +.searchPhone').text()+'\n'
#names.append( )
return {
"data": data,
}
@adalenv
Copy link
Author

adalenv commented May 22, 2018

telexpl

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment