Skip to content

Instantly share code, notes, and snippets.

@phongjalvn
Forked from azeroz/socks24.py
Created November 24, 2012 19:00
Show Gist options
  • Save phongjalvn/4140986 to your computer and use it in GitHub Desktop.
Save phongjalvn/4140986 to your computer and use it in GitHub Desktop.
Scrape socks24
#!/usr/bin/env python
import sys, re
import argparse
from datetime import datetime
from urllib import urlopen
from BeautifulSoup import BeautifulSoup
def main(url):
addr = urlopen(url)
handle = addr.read()
soup = BeautifulSoup(handle)
proxies = soup.findAll(name = 'pre', attrs = {'class' : 'alt2'})
proxies = soup.find(text = re.compile("Cleaned:")).findNext()
proxiesList = re.split('\\n', unicode.join(u'\n', map(unicode, proxies)))
utcnow = datetime.utcnow()
print "Last checked: %s UTC " % utcnow
for ip in proxiesList:
print unicode(ip)
sys.stdout.flush()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description = 'Get recent cleaned \
proxies from sites.')
parser.add_argument('--url', type = str,
default = 'http://www.socks24.org',
help = 'Default - http://www.socks24.org')
args = parser.parse_args()
#sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
main(args.url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment