Skip to content

Instantly share code, notes, and snippets.

@iamsk
Created August 8, 2015 09:10
Show Gist options
  • Save iamsk/13841ffbf946033bdeaf to your computer and use it in GitHub Desktop.
Save iamsk/13841ffbf946033bdeaf to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import os
import slugify
from selenium import webdriver
DIR = os.path.dirname(os.path.abspath(__file__))
def capture_with_proxy(url, proxy_url, username, password):
if proxy_url.startswith('http://'):
proxy_url = proxy_url[7:]
service_args = [
'--proxy=%s' % proxy_url,
'--proxy-auth=%s:%s' % (username, password),
]
service_log_path = 'capture.log'
browser = webdriver.PhantomJS(service_args=service_args,
service_log_path=service_log_path)
browser.set_window_size(400, 300)
max_wait = 30
browser.set_page_load_timeout(max_wait)
browser.set_script_timeout(max_wait)
file_path = slugify.slugify(url.decode('utf-8')) + '.png'
file_path = os.path.join(DIR, file_path)
try:
browser.get(url)
browser.save_screenshot(file_path)
except Exception:
pass
browser.close()
if __name__ == "__main__":
proxy_url = ''
username = ''
password = ''
count = 0
# blogs.txt is file with lots of blog urls in each line
with open(os.path.join(DIR, 'blogs.txt')) as f:
blogs = f.readlines()
for blog in blogs[99:]:
count += 1
print count, blog
capture_with_proxy(blog, proxy_url, username, password)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment