Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save joshuadfranklin/2fb4b332c7ed88197aae5d4ccf813f1b to your computer and use it in GitHub Desktop.
Save joshuadfranklin/2fb4b332c7ed88197aae5d4ccf813f1b to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# http://stackoverflow.com/questions/51233/how-can-i-retrieve-the-page-title-of-a-webpage-using-python
import sys
import six
from six.moves import html_parser
from six.moves import urllib
from six.moves import http_cookiejar
from six.moves import socketserver
from BeautifulSoup import BeautifulSoup
if len(sys.argv) < 2 :
print("Usage: title.py http:/example.com")
sys.exit(1)
urlstr = sys.argv[1]
if urlstr.startswith('http') is False:
urlstr = 'http://' + urlstr
# cookies not used; agent string header due to some websites not allowing urllib
cj = http_cookiejar.CookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
opener.addheaders = [('User-agent', 'Mozilla/5.0 (compatible; X 10.0; Commodore 64) Chrome/666 BeautifulSoup')]
response = opener.open(urlstr)
page = response.read()
soup = BeautifulSoup(page)
print (soup.title.string)
print (urlstr)
print
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment