Created
May 14, 2016 14:21
-
-
Save zeroxia/ae3a5d2bbd4f1fa03cfdab56a686a41e to your computer and use it in GitHub Desktop.
Python 2.7.6 (on Ubuntu 14.04) does not support SSL verfication using urllib2, while with later python 2.7.x, ssl certificate verification is enabled by default. Here is some snippets on properly tackling these caveats.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import socket | |
import httplib | |
import ssl | |
import urllib | |
import urllib2 | |
# archaic python urllib2 documentation: 2.7.2 | |
# https://python.readthedocs.io/en/v2.7.2/library/urllib2.html | |
# Note: since python 2.7.9, urllib2 enforces ssl certificate verification by default | |
# download ca certificates from: | |
# https://curl.haxx.se/ca/cacert.pem | |
# courtesy of: http://thejosephturner.com/blog/post/https-certificate-verification-in-python-with-urllib2/ | |
class VerifiedHTTPSConnection(httplib.HTTPSConnection): | |
def connect(self): | |
# overrides the version in httplib so that we do | |
# certificate verification | |
sock = socket.create_connection((self.host, self.port), self.timeout) | |
if self._tunnel_host: | |
self.sock = sock | |
self._tunnel() | |
# wrap the socket using verification with the root certs in "cacert.pem" | |
self.sock = ssl.wrap_socket(sock, | |
self.key_file, | |
self.cert_file, | |
cert_reqs=ssl.CERT_REQUIRED, | |
ca_certs="cacert.pem") | |
# wraps https connections with ssl certificate verification | |
class VerifiedHTTPSHandler(urllib2.HTTPSHandler): | |
def __init__(self, connection_class = VerifiedHTTPSConnection): | |
self.specialized_conn_class = connection_class | |
urllib2.HTTPSHandler.__init__(self) | |
def https_open(self, req): | |
print("custom https_open: %s" % req.get_full_url()) | |
return self.do_open(self.specialized_conn_class, req) | |
def inspect(obj, attr): | |
mem = getattr(obj, attr) | |
if callable(mem): | |
print("{:20s}: {}".format( | |
".".join([obj.__class__.__name__, attr]), mem())) | |
else: | |
print("{:20s}: {}".format( | |
".".join([obj.__class__.__name__, attr]), mem)) | |
#if not hasattr(obj, attr): | |
# raise AttributeError("%s does not have '%s'" % (obj, attr)) | |
def inspect_response(response): | |
inspect(response, 'getcode') | |
inspect(response, 'geturl') | |
inspect(response, 'code') | |
inspect(response, 'url') | |
inspect(response, 'msg') | |
inspect(response, 'fileno') | |
#inspect(response, 'fp') | |
#inspect(response, 'headers') | |
inspect(response, 'info') | |
#print("dir of response:", dir(response)) | |
#print ("dir:", dir(response.headers)) | |
def test_dl_http(): | |
url = 'http://www.163.com/cgi-bin/register.cgi' | |
user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)' | |
values = { 'name': 'Michael Foord', | |
'location': 'Northampton', | |
'language': 'Python' } | |
headers = {'User-Agent': user_agent} | |
data = urllib.urlencode(values) | |
req = urllib2.Request(url, data, headers) | |
response = urllib2.urlopen(req) | |
#the_page = response.read() | |
inspect_response(response) | |
def test_dl_https(): | |
#ctx = ssl.create_default_context() | |
#ctx = ssl.SSLContext() | |
#ctx = ssl.create_default_context() | |
#ctx.check_hostname = False | |
#ctx.verify_mode = ssl.CERT_NONE | |
url = "https://zeroxia.org/" | |
url = "https://kyfw.12306.cn/otn" | |
req = urllib2.Request(url) | |
resp = urllib2.urlopen(req, timeout=5) | |
#data = resp.read() | |
#print(data) | |
inspect_response(resp) | |
def test_dl_https_2(): | |
url = "https://zeroxia.org/" | |
url = "https://kyfw.12306.cn/otn" | |
url = "https://www.baidu.com/" | |
https_handler = VerifiedHTTPSHandler() | |
url_opener = urllib2.build_opener(https_handler) | |
urllib2.install_opener(url_opener) | |
response = urllib2.urlopen(url, timeout=8) | |
#response = url_opener.open(url) | |
inspect_response(response) | |
response.close() | |
#lines= handle.readlines() | |
if __name__ == "__main__": | |
#test_dl_http() | |
#test_dl_https() | |
test_dl_https_2() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment