Skip to content

Instantly share code, notes, and snippets.

@unnamedd
Created August 12, 2009 23:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save unnamedd/166849 to your computer and use it in GitHub Desktop.
Save unnamedd/166849 to your computer and use it in GitHub Desktop.
Get Source Code Through Proxy
# -*- coding: latin-1 -*-
# @Author: Thiago Holanda
# @Email: unnamedd@gmail.com
# @Date: 12/08/2009
import re
import urllib2
import sys
def getSourceCode(url):
_proxy_handler = urllib2.ProxyHandler({"http":"http://your_proxy:8080"})
build_opener = urllib2.build_opener(_proxy_handler,urllib2.HTTPHandler)
urllib2.install_opener(build_opener)
req = urllib2.Request(url)
try:
return urllib2.urlopen(url).read()
except URLError, e:
print "Error code:\n" + e.codecsprint + "\n\n"
print "Source Code Retrieved: \n" + e.read() + "\n\n"
#TODO: Get Links from source code using Regex
#def doLinks():
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment