-
-
Save habedi/7ecbf0a7d89b259f8fa9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from grab import Grab, GrabError | |
from multiprocessing.dummy import Pool as ThreadPool | |
import time | |
def load_proxy_list(filepath): | |
f = open(filepath) | |
proxy_list = f.read().splitlines() | |
f.close() | |
return proxy_list | |
def get_valid_proxy(proxy_list): #format of items e.g. '128.2.198.188:3124' | |
g = Grab() | |
for proxy in proxy_list: | |
g.setup(proxy=proxy, proxy_type='http', connect_timeout=5, timeout=5) | |
print("checking %s" % proxy) | |
try: | |
g.go('baidu.com') | |
except GrabError: | |
#logging.info("Test error") | |
pass | |
else: | |
yield proxy | |
def check_proxy(proxy): | |
g = Grab() | |
g.setup(proxy=proxy, proxy_type='http', connect_timeout=4, timeout=4) | |
try: | |
g.go('baidu.com') | |
except GrabError: | |
#logging.info("Test error") | |
pass | |
else: | |
return proxy | |
def save_list(valid_list): | |
nowtime = time.time() | |
filename = str(nowtime).split(".")[0] | |
f = open(filename + ".txt", "w") | |
f.write("\n".join(valid_list)) | |
f.close() | |
if __name__ == "__main__": | |
proxy_list = load_proxy_list("proxylist.txt") | |
# valid_list = get_valid_proxy(proxy_list) | |
pool = ThreadPool(8) | |
valid_list = pool.map(check_proxy, proxy_list) | |
pool.close() | |
pool.join() | |
valid_list = [x for x in valid_list if x is not None] | |
save_list(valid_list) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment