-
-
Save ChiChou/8ae9512fad468a042c84 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#coding:utf-8 | |
''' | |
Author: @CodeColorist | |
Requirements: pip install lxml beautifulsoup4 tinycss | |
Usage: | |
from hidemyass import proxies | |
... | |
for proxy in proxies(): | |
# do something | |
''' | |
import urllib2 | |
import tinycss | |
from bs4 import BeautifulSoup | |
def proxies(): | |
# check whether decl is a css property that hides the element | |
isinvisible = lambda decl: (decl.name == 'display' and decl.value.as_css() == 'none') \ | |
or (decl.name == 'visibility' and decl.value.as_css() == 'hidden') | |
css_parser = tinycss.make_parser() | |
html = urllib2.urlopen('http://proxylist.hidemyass.com').read() | |
soup = BeautifulSoup(html, 'lxml') | |
table = soup.find('table', id='listable') | |
rows = table.find_all('tr')[1:] # skip first row | |
# remove invisible elements | |
for style in table.find_all('style'): | |
invisible_classes = [rule.selector.as_css() for rule in css_parser.parse_stylesheet(style.text).rules if \ | |
any(map(isinvisible, rule.declarations))] | |
[e.extract() for e in table.select(','.join(invisible_classes))] | |
style.extract() | |
[e.extract() for e in table.select('[style]') if e.name != 'tr' and any(filter( | |
lambda rule: any(map(isinvisible, rule)), css_parser.parse_style_attr(e.get('style'))))] | |
# parse data | |
order = ('lastupdate', 'ip', 'port', 'country', 'speed', 'conectiontime', 'type', 'anon') | |
for tr in rows: | |
td = tr.find_all('td') | |
yield {key: td[i].div.get('value') \ | |
if key in ('speed', 'conectiontime') else td[i].text.strip() \ | |
for i, key in enumerate(order)} | |
if __name__ == '__main__': | |
print list(proxies()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment