Skip to content

Instantly share code, notes, and snippets.

@rafikahmed
Created January 10, 2020 09:02
Show Gist options
  • Save rafikahmed/52ede7dade79420bdc91ead2bbbc0a06 to your computer and use it in GitHub Desktop.
Save rafikahmed/52ede7dade79420bdc91ead2bbbc0a06 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import scrapy
class BlockchainSpider(scrapy.Spider):
name = 'blockchain'
allowed_domains = ['www.blockchain.com']
start_urls = ['https://www.blockchain.com/explorer']
def parse(self, response):
#position >=3 because div1 & div2 are the table headers
blocks = response.xpath("//div[@class='sc-1kj8up-0 cNSdQW']/div[position() >= 3]")
for block in blocks:
yield {
'height': block.xpath(".//div[1]/div[2]/a/text()").get(),
'hash': block.xpath(".//div[2]/div[2]/a/text()").get()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment