Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
example
class DmozSpider(BaseSpider):
name = "twitter.com"
name = "dmoz"
allowed_domains = ["codinginmysleep.com"]
start_urls = [
"http://codinginmysleep.com"
# "http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
]
def parse(self, response):
#logging.warning(response.body)
hxs = HtmlXPathSelector(response)
links = hxs.select("//a/@href")
items = []
for link in links:
text = link.extract()
getLinks(text)
item = BlogscrapeItem()
item['link'] = text
items.append(item)
return items
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.