Skip to content

Instantly share code, notes, and snippets.

@ehzawad
Created January 29, 2024 09:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ehzawad/75c08e47c4504e9e51fefe56eb34d555 to your computer and use it in GitHub Desktop.
Save ehzawad/75c08e47c4504e9e51fefe56eb34d555 to your computer and use it in GitHub Desktop.
Web Scraping from a simple website
import requests
from bs4 import BeautifulSoup
def scrape_page(url):
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
faqs = []
for item in soup.find_all('div', class_='accordion-item'):
question = item.find('button', class_='accordion-button').text.strip()
answer = item.find('div', class_='accordion-body').text.strip()
faqs.append({'Question': question, 'Answer': answer})
return faqs
except requests.HTTPError as http_err:
print(f"HTTP error occurred: {http_err}")
except Exception as err:
print(f"An error occurred: {err}")
def save_to_file(faqs, filename):
with open(filename, 'a', encoding='utf-8') as file:
for faq in faqs:
file.write(f"Question: {faq['Question']}\n")
file.write(f"Answer: {faq['Answer']}\n")
file.write("\n")
# Example usage
base_url = 'https://eporcha.gov.bd/faq?page='
pages = 6 # Total number of pages
for page in range(1, pages + 1):
url = f"{base_url}{page}"
faqs = scrape_page(url)
if faqs:
save_to_file(faqs, 'faqs.txt')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment