Skip to content

Instantly share code, notes, and snippets.

@starenka
Created May 3, 2014 20:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save starenka/86f805252b221795f9d9 to your computer and use it in GitHub Desktop.
Save starenka/86f805252b221795f9d9 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# coding=utf-8
import argparse
from selenium import webdriver
BROWSERS = dict(phantom=webdriver.PhantomJS,
opera=webdriver.Opera,
firefox=webdriver.Firefox,
chrome=webdriver.Chrome
)
PAYLOAD = '''
links = [];
$(".fileReset").each(function(i, el){
if(!/^přihlásit|zaregistruj se$/.test($(el).text())){
url = 'http://uloz.to' + ad.decrypt(kn[$(el).attr('data-icon')]);
if(links.indexOf(url) == -1) links.push(url);
}
});
return links;
'''
parser = argparse.ArgumentParser(description='Scrapes links from Uloz.to search page')
parser.add_argument('url', nargs='+')
parser.add_argument('-b', '--browser', dest='browser', choices=BROWSERS.keys(),
default='phantom', help='browser backend to use')
args = parser.parse_args()
browser = BROWSERS[args.browser]()
browser.get(args.url)
links = browser.execute_script(PAYLOAD)
for one in links:
print one
browser.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment