Skip to content

Instantly share code, notes, and snippets.

@robinfang
Created August 17, 2014 03:49
Show Gist options
  • Save robinfang/98039fa0f68b99412dcb to your computer and use it in GitHub Desktop.
Save robinfang/98039fa0f68b99412dcb to your computer and use it in GitHub Desktop.
检查代理可用否
from grab import Grab, GrabError
from multiprocessing.dummy import Pool as ThreadPool
import time
def load_proxy_list(filepath):
f = open(filepath)
proxy_list = f.read().splitlines()
f.close()
return proxy_list
def get_valid_proxy(proxy_list): #format of items e.g. '128.2.198.188:3124'
g = Grab()
for proxy in proxy_list:
g.setup(proxy=proxy, proxy_type='http', connect_timeout=5, timeout=5)
print("checking %s" % proxy)
try:
g.go('baidu.com')
except GrabError:
#logging.info("Test error")
pass
else:
yield proxy
def check_proxy(proxy):
g = Grab()
g.setup(proxy=proxy, proxy_type='http', connect_timeout=4, timeout=4)
try:
g.go('baidu.com')
except GrabError:
#logging.info("Test error")
pass
else:
return proxy
def save_list(valid_list):
nowtime = time.time()
filename = str(nowtime).split(".")[0]
f = open(filename + ".txt", "w")
f.write("\n".join(valid_list))
f.close()
if __name__ == "__main__":
proxy_list = load_proxy_list("proxylist.txt")
# valid_list = get_valid_proxy(proxy_list)
pool = ThreadPool(8)
valid_list = pool.map(check_proxy, proxy_list)
pool.close()
pool.join()
valid_list = [x for x in valid_list if x is not None]
save_list(valid_list)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment