Skip to content

Instantly share code, notes, and snippets.

@nooperpudd
Last active August 24, 2017 05:42
Show Gist options
  • Save nooperpudd/8748976 to your computer and use it in GitHub Desktop.
Save nooperpudd/8748976 to your computer and use it in GitHub Desktop.
urllib2 get the web content .
import urllib2, zlib
def GetContent(url, gzip=False, charset=None, headers=None):
"用于解析url内容"
try:
msg = None
if headers:
request = urllib2.Request(url=url, headers=headers)
else:
request = urllib2.Request(url)
content = urllib2.urlopen(request)
if content.msg == 'OK' and content.getcode() == 200:
encoding = content.headers.get('content-encoding', None)
if encoding in ['gzip', 'deflate']:
if encoding == 'gzip':
msg = GzipStream(content)
elif encoding == 'deflate':
msg = zlib.decompress(content).read()
else:
msg = content.read()
if charset:
return msg.decode(charset).encode('utf8')
return msg
except Exception as e:
print e
else:
content.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment