cell13/gethyperlink01.py

## gethyperlink01.py
#Get links from website, demo.
#if there is no bs4, you should use the command to install it first
#pip install beautifulsoup4
from bs4 import BeautifulSoup
import urllib2
import re

html_page = urllib2.urlopen("http://arstechnica.com")
soup = BeautifulSoup(html_page,"html.parser")
for link in soup.findAll('a', attrs={'href': re.compile("^http://")}):
    print link.get('href')

## gethyperlink02.py
#Function to extract links from webpage, demo
#if there is no bs4, you should use the command to install it first
#pip install beautifulsoup4
from bs4 import BeautifulSoup
import urllib2
import re

def getLinks(url):
    html_page = urllib2.urlopen(url)
    soup = BeautifulSoup(html_page,"html.parser")
    links = []

    for link in soup.findAll('a', attrs={'href': re.compile("^http://")}):
        links.append(link.get('href'))

    return links

print( getLinks("http://arstechnica.com") )
	#Get links from website, demo.
	#if there is no bs4, you should use the command to install it first
	#pip install beautifulsoup4
	from bs4 import BeautifulSoup
	import urllib2
	import re

	html_page = urllib2.urlopen("http://arstechnica.com")
	soup = BeautifulSoup(html_page,"html.parser")
	for link in soup.findAll('a', attrs={'href': re.compile("^http://")}):
	print link.get('href')
	#Function to extract links from webpage, demo
	#if there is no bs4, you should use the command to install it first
	#pip install beautifulsoup4
	from bs4 import BeautifulSoup
	import urllib2
	import re

	def getLinks(url):
	html_page = urllib2.urlopen(url)
	soup = BeautifulSoup(html_page,"html.parser")
	links = []

	for link in soup.findAll('a', attrs={'href': re.compile("^http://")}):
	links.append(link.get('href'))

	return links

	print( getLinks("http://arstechnica.com") )