Skip to content

Instantly share code, notes, and snippets.

@littlepea
Created July 29, 2013 11:28
Show Gist options
  • Save littlepea/6103714 to your computer and use it in GitHub Desktop.
Save littlepea/6103714 to your computer and use it in GitHub Desktop.
Get a random proxy from a cached remote list
import random
import urllib2
import os
from django.conf import settings
from django.core.cache import cache
""" JS to get proxies from http://hidemyass.com/proxy-list/
arr = []
ports = []
tr = $('#listtable tr[class!="altshade"]')
tr.each(function(i, item) {
//ip.push(item)
ip = []
$(item).find('td:eq(1) span:visible').each(function(j, s) {
t = parseInt($(s).text())
t1 = parseInt($(s).children(':first').text())
if(t < 256) {
segment = t
}
else {
if(t1 < 256) segment = t1
}
if(ip.indexOf(segment) < 0) ip.push(segment)
});
$(item).find('td:eq(2)').each(function(j, s) {
p = parseInt($(s).text())
})
res = ip.join('.')+':'+p
if(ip.length == 4) arr.push(res)
})
for(i=0; i<arr.length; i++) {
console.log(arr[i])
}
"""
PROXIES = [
'208.94.244.20:3128',
'37.59.167.184:3128',
'122.141.243.215:80',
'78.1s11.247.217:1080',
'187.60.96.7:3128',
'61.141.21.34:8080',
'218.28.111.46:8080',
'190.66.17.53:3128',
'46.20.4.26:8080',
]
PROXY_LISTS = [
'http://dl.dropbox.com/u/943118/_reliable_list.txt',
# 'http://multiproxy.org/txt_anon/proxy.txt',
# 'http://www.proxynova.com/get_proxies.php?proxy_country=0&proxy_type=1&btn_submit=Download+all+Proxies',
]
def get_proxy(exclude=None):
proxies = cache.get('proxies_list')
if not proxies:
# try to get a fresh proxy online
try:
i = random.randint(0, len(PROXY_LISTS)-1)
list = PROXY_LISTS[i]
print list
result = urllib2.urlopen(list, timeout=5).read()
proxies = result.split()
proxies = proxies[6:-1]
except:
try:
proxy_file = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proxies', '_reliable_list.txt')
proxies = open(proxy_file, 'r').read().split() #PROXIES
# print "couldn't get online proxies, using static instead: %s" % proxy_file
except:
proxies = PROXIES
# print "couldn't get any proxies, using PROXIES instead"
cache.set('proxies_list', proxies, settings.DEFAULT_CACHE_DURATION)
if exclude:
if exclude['http'] in proxies:
proxies.remove(exclude['http'])
cache.set('proxies_list', proxies, settings.DEFAULT_DATA_CACHE_DURATION)
i = random.randint(0, len(proxies)-1)
return {
'http': proxies[i]
}
#if __name__ == '__main__':
# print get_proxy()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment