Created
June 21, 2020 19:48
-
-
Save hamletbatista/d08b0d5ac5139911be0d189789e94970 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scrapy | |
import pickle | |
class ShopifyThemeLinkSpider(scrapy.Spider): | |
name = 'shopifyspider' | |
with open('theme_links.pkl', 'rb') as f: | |
theme_links = pickle.load(f) | |
start_urls = theme_links.keys() | |
def parse(self, response): | |
#for theme in response.css('.theme-preview-link'): # I couldn't figure out how to get the first element | |
for theme in response.xpath("//a[contains(@class, 'theme-preview-link')][1]"): | |
yield {"demo-url": f"https://{theme.css('::attr(data-demo-url)').get()}", | |
"link" : response.url, #crawled page | |
"theme": self.theme_links[response.url]["theme"]} #theme from pickled file |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
To try: https://twitter.com/ZeNobral/status/1308529080694964226?s=20