Skip to content

Instantly share code, notes, and snippets.

@dray89
Created August 29, 2020 23:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dray89/9c3942939e2721b25b8fbc7aaebad105 to your computer and use it in GitHub Desktop.
Save dray89/9c3942939e2721b25b8fbc7aaebad105 to your computer and use it in GitHub Desktop.
How to scrape tables using Scrapy
import scrapy
import pandas
from ..items import YahooItem
class YahooSpider(scrapy.Spider):
name = 'Yahoo'
symbols = ["ADSK","BA","CAT","EBAY","GS","HSY","IBM","JPM","WMT","SHOP",
"T", "F", "TRI", "AMZN", "C", "A", "O", "B","MSFT", "NVDA",
"DIS", "AAL", "NFLX", "JNJ","BAC","GOOGL", "WFC"]
start_urls = ['https://finance.yahoo.com/quote/{0}/history?p={0}'.format(x) for x in symbols]
def parse(self, response):
items = YahooItem()
data = response.xpath('//table//text()').extract()
title = response.xpath('//title//text()').extract()
num_cols = 7
output = [data[i:i + num_cols] for i in range(0, len(data), num_cols)]
dictionary = pandas.DataFrame(output[1:], columns=output[0]).set_index('Date').to_dict()
items['title'] = title
items['data'] = dictionary
yield items
@rukshar69
Copy link

Can you please share the code for YahooItem?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment