Skip to content

Instantly share code, notes, and snippets.

@wickman
Created June 11, 2014 16:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wickman/8a3eb57ee35e84e6bc85 to your computer and use it in GitHub Desktop.
Save wickman/8a3eb57ee35e84e6bc85 to your computer and use it in GitHub Desktop.
requests patch to twitter.common.http
diff --git a/src/python/twitter/common/python/http/http.py b/src/python/twitter/common/python/http/h
index 51d0543..db5ab05 100644
--- a/src/python/twitter/common/python/http/http.py
+++ b/src/python/twitter/common/python/http/http.py
@@ -6,7 +6,7 @@ import struct
import time
from ..common import safe_delete, safe_mkdir, safe_mkdtemp
-from ..compatibility import PY2, PY3
+from ..compatibility import PY2, PY3, StringIO
from .tracer import TRACER
if PY3:
@@ -25,6 +25,13 @@ else:
import urlparse
+try:
+ import requests
+ HAS_REQUESTS = True
+except ImportError:
+ HAS_REQUESTS = False
+
+
class Timeout(Exception):
pass
@@ -59,6 +66,34 @@ def deadline(fn, *args, **kw):
raise Timeout
+# TODO(wickman) Extract md5/sha1 fragments and verify.
+# TODO(wickman) Establish a chain of trust rooted at pypi and anything explicitly called
+# out in find-links a la pip.
+
+
+def urllib_open(url, verify=False):
+ if verify:
+ raise FetchError('urlopen does not support SSL cert verification.')
+ try:
+ return urllib_request.urlopen(url, **kw)
+ except (urllib_error.URLError, HTTPException) as exc:
+ raise FetchError(exc)
+
+
+def requests_open(url, verify=False):
+ try:
+ resp = requests.get(url, verify=verify)
+ except requests.exceptions.RequestException as exc:
+ raise FetchError(exc)
+ return addinfourl(StringIO(resp.content), resp.headers, url, code=resp.status_code)
+
+
+if HAS_REQUESTS:
+ global_opener = requests_open
+else:
+ global_opener = urllib_open
+
+
class Web(object):
NS_TIMEOUT_SECS = 5.0
CONN_TIMEOUT = 1.0
@@ -112,7 +147,7 @@ class Web(object):
return 'file://' + os.path.realpath(url)
return url
- def open(self, url, conn_timeout=None, **kw):
+ def open(self, url, conn_timeout=None, verify=True):
"""
Wrapper in front of urlopen that more gracefully handles odd network environments.
"""
@@ -120,10 +155,7 @@ class Web(object):
with TRACER.timed('Fetching %s' % url, V=1):
if not self.reachable(url, conn_timeout=conn_timeout):
raise FetchError('Could not reach %s within deadline.' % url)
- try:
- return urllib_request.urlopen(url, **kw)
- except (urllib_error.URLError, HTTPException) as exc:
- raise FetchError(exc)
+ return global_opener(url, verify=verify)
class CachedWeb(object):
@@ -168,16 +200,16 @@ class CachedWeb(object):
return False
return age > ttl
- def really_open(self, url, conn_timeout=None):
+ def really_open(self, url, conn_timeout=None, **kw):
try:
- return self._opener.open(url, conn_timeout=conn_timeout)
+ return self._opener.open(url, conn_timeout=conn_timeout, **kw)
except urllib_error.HTTPError as fp:
# HTTPError is a valid addinfourl -- use this instead of raising
return fp
- def encode_url(self, url, conn_timeout=None):
+ def encode_url(self, url, **kw):
target, target_tmp, headers, headers_tmp = self.translate_all(url)
- with contextlib.closing(self.really_open(url, conn_timeout=conn_timeout)) as http_fp:
+ with contextlib.closing(self.really_open(url, **kw)) as http_fp:
# File urls won't have a response code, they'll either open or raise.
if http_fp.getcode() and http_fp.getcode() != 200:
raise urllib_error.URLError('Non-200 response code from %s' % url)
@@ -202,21 +234,21 @@ class CachedWeb(object):
for path in self.translate_all(url):
safe_delete(path)
- def cache(self, url, conn_timeout=None):
+ def cache(self, url, **kw):
"""cache the contents of a url."""
try:
- self.encode_url(url, conn_timeout=conn_timeout)
+ self.encode_url(url, **kw)
except urllib_error.URLError:
self.clear_url(url)
raise
- def open(self, url, ttl=None, conn_timeout=None):
+ def open(self, url, ttl=None, **kw):
"""Return a file-like object with the content of the url."""
expired = self.expired(url, ttl=ttl)
with TRACER.timed('Opening %s' % ('(cached)' if not expired else '(uncached)'), V=1):
if expired:
try:
- self.cache(url, conn_timeout=conn_timeout)
+ self.cache(url, **kw)
except (urllib_error.URLError, HTTPException) as exc:
if not self._failsoft or url not in self:
raise FetchError(exc)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment