Skip to content

Instantly share code, notes, and snippets.

@AnderRV
Created August 20, 2021 10:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AnderRV/a1a33d102e43893f278b379dd17e104c to your computer and use it in GitHub Desktop.
Save AnderRV/a1a33d102e43893f278b379dd17e104c to your computer and use it in GitHub Desktop.
import json
import defaults
import repo
def extract_content(url, soup):
return [{
'id': product.find('a',
attrs={'data-product_id': True})['data-product_id'],
'name': product.find('h2').text,
'price': product.find(class_='amount').text
} for product in soup.select('.product')]
def store_content(url, content):
for item in content:
if item['id']:
repo.set_content(item['id'], json.dumps(item))
def allow_url_filter(url):
return '/shop/page/' in url and '#' not in url
def get_html(url):
return defaults.get_html(url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment