Skip to content

Instantly share code, notes, and snippets.

@luxu
Created January 18, 2019 22:39
Show Gist options
  • Save luxu/4c3ed799960c95acdbbcbd841623334e to your computer and use it in GitHub Desktop.
Save luxu/4c3ed799960c95acdbbcbd841623334e to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
from requests import get
from bs4 import BeautifulSoup as bs
def links(url):
html = get(url)
soup = bs(html.content, 'html.parser')
return soup
def parse(soup):
print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>PRÓXIMA RASPAGEM<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n')
links = soup.find_all('div',class_='nm-product-name')
[print(link) for link in links]
print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>FIM DA RASPAGEM<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n')
return soup
if __name__ == '__main__':
url = "https://search3.pontofrio.com.br/busca?q=capacete+norisk"
soup = links(url)
parse(soup)
prox = soup.find('li',class_='neemu-pagination-next').a
while prox:
soup = links(u'https:{}'.format(prox.attrs['href']))
print(soup)
soup = parse(soup)
prox = soup.find('li',class_='neemu-pagination-next').a
print(prox)
if not prox:
prox = False
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment