Skip to content

Instantly share code, notes, and snippets.

@alirezamika
alirezamika / autoscraper-examples.md
Last active May 15, 2024 03:29
AutoScraper Examples

Grouping results and removing unwanted ones

Here we want to scrape product name, price and rating from ebay product pages:

url = 'https://www.ebay.com/itm/Sony-PlayStation-4-PS4-Pro-1TB-4K-Console-Black/203084236670' 

wanted_list = ['Sony PlayStation 4 PS4 Pro 1TB 4K Console - Black', 'US $349.99', '4.8'] 

scraper.build(url, wanted_list)
from collections import defaultdict
class Q:
def __init__(self, alpha=0.5, discount=0.5):
self.alpha = alpha
self.discount = discount
self.values = defaultdict(lambda: defaultdict(lambda: 0.0))
def update(self, state, action, next_state, reward):
import random
from q import Q
from tictactoe import TicTacToe
class Agent:
def __init__(self):
self.eps = 1.0
self.qlearner = Q()
from agent import Agent
from tictactoe import TicTacToe
def play(agent):
game = TicTacToe()
while True:
action = agent.qlearner.get_best_action(game.get_state())
winner = game.play(*action)
if winner:
class TicTacToe:
def __init__(self, render=True):
self.board = [[0, 0, 0] for _ in range(3)]
self.player = 1
self.repr = {0: ".", 1: "x", -1: "o"}
self.render = render
def _get_winner(self):
# check horizontal
for i in range(3):
from autoscraper import AutoScraper
from flask import Flask, request
ebay_scraper = AutoScraper()
etsy_scraper = AutoScraper()
ebay_scraper.load('ebay-search')
etsy_scraper.load('etsy-search')
app = Flask(__name__)
scraper.keep_rules(['rule_705x', 'rule_70m8', 'rule_d9wp', 'rule_kv6p'])
scraper.save('etsy-search')
url = 'https://www.etsy.com/search?q=macbook'
wanted_dict = {
'title': [
'Apple MacBook Pro i9 32GB 500GB Radeon 560X 15.4 2018 Touch Bar 2.9GHz 6-Core',
'Laptop MacBook Premium Ergonomic Wood Stand Holder Computer Gift Nerd Tech Geek Mens, woodworking gift, Home office workspace accessories',
],
'price': ['1,500.00', '126.65'],
'url': ['851553172']
}
scraper.set_rule_aliases({'rule_0aok': 'title', 'rule_vn5z': 'price', 'rule_buz1': 'url'})
scraper.keep_rules(['rule_0aok', 'rule_vn5z', 'rule_buz1'])
scraper.save('ebay-search')
from autoscraper import AutoScraper
url = 'https://www.ebay.com/sch/i.html?_nkw=iphone'
wanted_list = ['Apple iPhone X 64GB Factory Unlocked Smartphone', '$389.99', 'https://www.ebay.com/itm/Apple-iPhone-X-64GB-Factory-Unlocked-Smartphone/254187579586?epid=238944741&hash=item3b2ec2a8c2:g:ZPQAAOSwD6VdpL~9']
scraper = AutoScraper()
result = scraper.build(url=url, wanted_list=wanted_list)