Skip to content

Instantly share code, notes, and snippets.

@cbiggins
Last active October 11, 2016 01:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cbiggins/ee540f76d44db3f32ce2af0b3fcc5065 to your computer and use it in GitHub Desktop.
Save cbiggins/ee540f76d44db3f32ce2af0b3fcc5065 to your computer and use it in GitHub Desktop.
This gist is a python Selenium test. It loads a page, scrapes links, selects one at random (making sure its matching the right domain) and then checks that page for a Javascript object (in this scenario the object is Nielsen analytics but could be anything.
#ScriptName : spiderTestNielssen.py
#---------------------
import unittest
import random
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
#Following are optional required
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
class SpiderNielsenTest(unittest.TestCase):
def setUp(self):
self.driver = webdriver.Firefox()
def test_for_nielsen_in_site(self):
driver = self.driver
visitedLinks = []
visitedLinksCount = 0;
visitedLinksLimit = 100;
testDomain = "http://www.example.com"
driver.get(testDomain)
links = driver.find_elements_by_tag_name('a')
assert "object" in driver.execute_script("return typeof NOLCMB;")
# The loop is large but will break when visitedLinksLimit is reached.
for i in range(0, 1000):
# Get a random link from the page.
curLink = random.choice(links)
curLinkHref = curLink.get_attribute('href')
# Discard empty links.
if not curLinkHref:
continue;
# Have we already seen this link?
if curLinkHref not in visitedLinks:
if testDomain in curLinkHref:
print curLinkHref
driver.get(curLinkHref)
try:
assert "object" in driver.execute_script("return typeof NOLCMB;")
except AssertionError:
# Object doesn't exist. Its possible it hasn't been initialised so ...
print "NO NIELSEN - FIRST RUN!"
# We'll wait 10 seconds and try again.
driver.implicitly_wait(10)
assert "object" in driver.execute_script("return typeof NOLCMB;")
# Test will fail here if object still doesn't exist after 10 seconds.
# Create a new list of links.
links = driver.find_elements_by_tag_name('a')
# Make sure we don't visit the same link twice. We'll log them in this list.
visitedLinks.append(curLinkHref)
visitedLinksCount += 1
else:
continue
else:
continue
# Break if we've hit our limit.
if visitedLinksCount is visitedLinksLimit:
break;
print visitedLinks
assert "object" in driver.execute_script("return typeof NOLCMB;")
def tearDown(self):
self.driver.close()
if __name__ == "__main__":
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment