burkeholland/scraper.py

## scraper.py
# write a web scraper that will scrape CSS Tricks and FreeCodeCamp for all articles written by author Burke Holland

import requests
from bs4 import BeautifulSoup
import csv

# get the html from the page
page = requests.get("https://css-tricks.com/author/burkeholland/")
soup = BeautifulSoup(page.text, "html.parser")

# find all of the anchors on the page that are contained within a div with the class of "article-article
posts = soup.find_all("div", class_="article-article")

# loop over each blog post and display the title
for post in posts:
    # get the title
    h2 = post.find("h2")
    anchor = h2.find("a")

    print(anchor.get_text())

    # now for each post, get the contents of the article
    articlePage = requests.get(anchor["href"])

    articleSoup = BeautifulSoup(articlePage.text, "html.parser")

    # get the body of the article
    articleBody = articleSoup.find(class_="article-content")

    print(articleBody.get_text())

    # articlePage = requests.get(post.find("a")["href"])

    # articleSoup = BeautifulSoup(articlePage.text, "html.parser")
    # # get the title
    # title_element = articleSoup.find(class_="article-title")
    # if title_element:
    #     title = title_element.get_text()
    # else:
    #     title = ""
    # print thte title
    # print(title)
	# write a web scraper that will scrape CSS Tricks and FreeCodeCamp for all articles written by author Burke Holland

	import requests
	from bs4 import BeautifulSoup
	import csv

	# get the html from the page
	page = requests.get("https://css-tricks.com/author/burkeholland/")
	soup = BeautifulSoup(page.text, "html.parser")

	# find all of the anchors on the page that are contained within a div with the class of "article-article
	posts = soup.find_all("div", class_="article-article")

	# loop over each blog post and display the title
	for post in posts:
	# get the title
	h2 = post.find("h2")
	anchor = h2.find("a")

	print(anchor.get_text())

	# now for each post, get the contents of the article
	articlePage = requests.get(anchor["href"])

	articleSoup = BeautifulSoup(articlePage.text, "html.parser")

	# get the body of the article
	articleBody = articleSoup.find(class_="article-content")

	print(articleBody.get_text())

	# articlePage = requests.get(post.find("a")["href"])

	# articleSoup = BeautifulSoup(articlePage.text, "html.parser")
	# # get the title
	# title_element = articleSoup.find(class_="article-title")
	# if title_element:
	# title = title_element.get_text()
	# else:
	# title = ""
	# print thte title
	# print(title)