Created
February 14, 2019 17:50
-
-
Save cgimenes/a360e41857a1cf918bf4c1ff29d95841 to your computer and use it in GitHub Desktop.
Simple python web scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
page = 0 | |
urls = [] | |
while True: | |
print(f"Page: {page}") | |
site = requests.get(f"https://pudim.com.br/?page={page}"); | |
if site.status_code is 200: | |
content = BeautifulSoup(site.content, 'html.parser') | |
objects = content.select('.object') | |
for object in objects: | |
url = button.get('href') | |
urls.append(url) | |
if content.select('.pagination .page-previous'): | |
page += 1 | |
else: | |
break | |
print('Results: ') | |
for url in urls: | |
print(object) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment