Skip to content

Instantly share code, notes, and snippets.

@zeroxia
Created May 14, 2016 14:21
Show Gist options
  • Save zeroxia/ae3a5d2bbd4f1fa03cfdab56a686a41e to your computer and use it in GitHub Desktop.
Save zeroxia/ae3a5d2bbd4f1fa03cfdab56a686a41e to your computer and use it in GitHub Desktop.
Python 2.7.6 (on Ubuntu 14.04) does not support SSL verfication using urllib2, while with later python 2.7.x, ssl certificate verification is enabled by default. Here is some snippets on properly tackling these caveats.
#!/usr/bin/python
import socket
import httplib
import ssl
import urllib
import urllib2
# archaic python urllib2 documentation: 2.7.2
# https://python.readthedocs.io/en/v2.7.2/library/urllib2.html
# Note: since python 2.7.9, urllib2 enforces ssl certificate verification by default
# download ca certificates from:
# https://curl.haxx.se/ca/cacert.pem
# courtesy of: http://thejosephturner.com/blog/post/https-certificate-verification-in-python-with-urllib2/
class VerifiedHTTPSConnection(httplib.HTTPSConnection):
def connect(self):
# overrides the version in httplib so that we do
# certificate verification
sock = socket.create_connection((self.host, self.port), self.timeout)
if self._tunnel_host:
self.sock = sock
self._tunnel()
# wrap the socket using verification with the root certs in "cacert.pem"
self.sock = ssl.wrap_socket(sock,
self.key_file,
self.cert_file,
cert_reqs=ssl.CERT_REQUIRED,
ca_certs="cacert.pem")
# wraps https connections with ssl certificate verification
class VerifiedHTTPSHandler(urllib2.HTTPSHandler):
def __init__(self, connection_class = VerifiedHTTPSConnection):
self.specialized_conn_class = connection_class
urllib2.HTTPSHandler.__init__(self)
def https_open(self, req):
print("custom https_open: %s" % req.get_full_url())
return self.do_open(self.specialized_conn_class, req)
def inspect(obj, attr):
mem = getattr(obj, attr)
if callable(mem):
print("{:20s}: {}".format(
".".join([obj.__class__.__name__, attr]), mem()))
else:
print("{:20s}: {}".format(
".".join([obj.__class__.__name__, attr]), mem))
#if not hasattr(obj, attr):
# raise AttributeError("%s does not have '%s'" % (obj, attr))
def inspect_response(response):
inspect(response, 'getcode')
inspect(response, 'geturl')
inspect(response, 'code')
inspect(response, 'url')
inspect(response, 'msg')
inspect(response, 'fileno')
#inspect(response, 'fp')
#inspect(response, 'headers')
inspect(response, 'info')
#print("dir of response:", dir(response))
#print ("dir:", dir(response.headers))
def test_dl_http():
url = 'http://www.163.com/cgi-bin/register.cgi'
user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'
values = { 'name': 'Michael Foord',
'location': 'Northampton',
'language': 'Python' }
headers = {'User-Agent': user_agent}
data = urllib.urlencode(values)
req = urllib2.Request(url, data, headers)
response = urllib2.urlopen(req)
#the_page = response.read()
inspect_response(response)
def test_dl_https():
#ctx = ssl.create_default_context()
#ctx = ssl.SSLContext()
#ctx = ssl.create_default_context()
#ctx.check_hostname = False
#ctx.verify_mode = ssl.CERT_NONE
url = "https://zeroxia.org/"
url = "https://kyfw.12306.cn/otn"
req = urllib2.Request(url)
resp = urllib2.urlopen(req, timeout=5)
#data = resp.read()
#print(data)
inspect_response(resp)
def test_dl_https_2():
url = "https://zeroxia.org/"
url = "https://kyfw.12306.cn/otn"
url = "https://www.baidu.com/"
https_handler = VerifiedHTTPSHandler()
url_opener = urllib2.build_opener(https_handler)
urllib2.install_opener(url_opener)
response = urllib2.urlopen(url, timeout=8)
#response = url_opener.open(url)
inspect_response(response)
response.close()
#lines= handle.readlines()
if __name__ == "__main__":
#test_dl_http()
#test_dl_https()
test_dl_https_2()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment