Skip to content

Instantly share code, notes, and snippets.

@noczero
Created July 13, 2024 06:17
Show Gist options
  • Save noczero/429bc3313a421b65fb6558414a3f4a27 to your computer and use it in GitHub Desktop.
Save noczero/429bc3313a421b65fb6558414a3f4a27 to your computer and use it in GitHub Desktop.
Scraping websites using Jina AI reader.
import requests
from typing import Optional
def scrape_website(url: str) -> Optional[str]:
url = f"https://r.jina.ai/{url}"
try:
response = requests.get(url)
if response.status_code == 200:
return response.text
else:
print(f"Failed to fetch data. Status code: {response.status_code}")
return None
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
return None
if __name__ == "__main__":
# Add more url to scrape
urls = [
'https://blog.zeroinside.id',
'https://zeroinside.id',
]
for url in urls:
data = scrape_website(url)
if data:
print(f"Successfully scraped data from #{url}.")
# You can process or save `data` here as needed.
print(data)
else:
print("Failed to scrape data.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment