Skip to content

Instantly share code, notes, and snippets.

@adamhrv
Created September 25, 2015 01:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save adamhrv/a1c5eb6034fb05ef9e32 to your computer and use it in GitHub Desktop.
Save adamhrv/a1c5eb6034fb05ef9e32 to your computer and use it in GitHub Desktop.
input line-delimited values to OpenWPM
from automation import TaskManager
import time
import os
import sys
# The list of sites that we wish to crawl
NUM_BROWSERS = 3
#sites = ['http://www.example.com',
# 'http://www.princeton.edu',
# 'https://citp.princeton.edu/']
# instead, run: python process-urls.py your-urls.txt
if( os.path.basename(__file__) != sys.argv[-1] ):
with open(sys.argv[-1]) as f:
sites = f.readlines()
print("Processing",len(sites)," sites")
else:
sys.exit(0)
# Saves a crawl output DB to the Desktop
db_loc = os.path.expanduser('~/Desktop/openwpm_demo.sqlite')
# Loads 3 copies of the default browser preference dictionaries
browser_params = TaskManager.load_default_params(NUM_BROWSERS)
#Enable flash for all three browsers
for i in xrange(NUM_BROWSERS):
browser_params[i]['disable_flash'] = False
#Launch the first browser headless
browser_params[0]['headless'] = True
# Instantiates the measurement platform
# Launches two (non-headless) Firefox instances and one headless instance
# logging data with MITMProxy
# Commands time out by default after 60 seconds
manager = TaskManager.TaskManager(db_loc, browser_params, NUM_BROWSERS)
# Visits the sites with both browsers simultaneously, 5 seconds between visits
for site in sites:
manager.get(site, index='**') # ** = synchronized browsers
time.sleep(5)
# Shuts down the browsers and waits for the data to finish logging
manager.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment