Skip to content

Instantly share code, notes, and snippets.

@ChiChou
Last active November 8, 2016 09:36
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save ChiChou/8ae9512fad468a042c84 to your computer and use it in GitHub Desktop.
Save ChiChou/8ae9512fad468a042c84 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
#coding:utf-8
'''
Author: @CodeColorist
Requirements: pip install lxml beautifulsoup4 tinycss
Usage:
from hidemyass import proxies
...
for proxy in proxies():
# do something
'''
import urllib2
import tinycss
from bs4 import BeautifulSoup
def proxies():
# check whether decl is a css property that hides the element
isinvisible = lambda decl: (decl.name == 'display' and decl.value.as_css() == 'none') \
or (decl.name == 'visibility' and decl.value.as_css() == 'hidden')
css_parser = tinycss.make_parser()
html = urllib2.urlopen('http://proxylist.hidemyass.com').read()
soup = BeautifulSoup(html, 'lxml')
table = soup.find('table', id='listable')
rows = table.find_all('tr')[1:] # skip first row
# remove invisible elements
for style in table.find_all('style'):
invisible_classes = [rule.selector.as_css() for rule in css_parser.parse_stylesheet(style.text).rules if \
any(map(isinvisible, rule.declarations))]
[e.extract() for e in table.select(','.join(invisible_classes))]
style.extract()
[e.extract() for e in table.select('[style]') if e.name != 'tr' and any(filter(
lambda rule: any(map(isinvisible, rule)), css_parser.parse_style_attr(e.get('style'))))]
# parse data
order = ('lastupdate', 'ip', 'port', 'country', 'speed', 'conectiontime', 'type', 'anon')
for tr in rows:
td = tr.find_all('td')
yield {key: td[i].div.get('value') \
if key in ('speed', 'conectiontime') else td[i].text.strip() \
for i, key in enumerate(order)}
if __name__ == '__main__':
print list(proxies())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment