benekastah/basic_scraper.py

## basic_scraper.py
from bs4 import BeautifulSoup
import requests

def scrape_page(url):
    r = requests.get("http://" + url)
    data = r.text
    soup = BeautifulSoup(data)

    for link in soup.find_all('a'):
        print(link.get('href'))
        # do the next line for pages you want to scrape:
        # scrape_page(lint.get('href'))
        # DON'T DO THIS FOR ALL LINKS. it could take you anywhere in the internet.
        # Check if the url is a page you are interested in first.

scrape_page("http://example.com")
	from bs4 import BeautifulSoup
	import requests

	def scrape_page(url):
	r = requests.get("http://" + url)
	data = r.text
	soup = BeautifulSoup(data)

	for link in soup.find_all('a'):
	print(link.get('href'))
	# do the next line for pages you want to scrape:
	# scrape_page(lint.get('href'))
	# DON'T DO THIS FOR ALL LINKS. it could take you anywhere in the internet.
	# Check if the url is a page you are interested in first.

	scrape_page("http://example.com")