Skip to content

Instantly share code, notes, and snippets.

@burkeholland
Last active May 30, 2023 14:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save burkeholland/d001367a0dc047ef1c676124184d7455 to your computer and use it in GitHub Desktop.
Save burkeholland/d001367a0dc047ef1c676124184d7455 to your computer and use it in GitHub Desktop.
scraper
# write a web scraper that will scrape CSS Tricks and FreeCodeCamp for all articles written by author Burke Holland
import requests
from bs4 import BeautifulSoup
import csv
# get the html from the page
page = requests.get("https://css-tricks.com/author/burkeholland/")
soup = BeautifulSoup(page.text, "html.parser")
# find all of the anchors on the page that are contained within a div with the class of "article-article
posts = soup.find_all("div", class_="article-article")
# loop over each blog post and display the title
for post in posts:
# get the title
h2 = post.find("h2")
anchor = h2.find("a")
print(anchor.get_text())
# now for each post, get the contents of the article
articlePage = requests.get(anchor["href"])
articleSoup = BeautifulSoup(articlePage.text, "html.parser")
# get the body of the article
articleBody = articleSoup.find(class_="article-content")
print(articleBody.get_text())
# articlePage = requests.get(post.find("a")["href"])
# articleSoup = BeautifulSoup(articlePage.text, "html.parser")
# # get the title
# title_element = articleSoup.find(class_="article-title")
# if title_element:
# title = title_element.get_text()
# else:
# title = ""
# print thte title
# print(title)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment