Skip to content

Instantly share code, notes, and snippets.

@andresriancho
Created July 3, 2014 12:36
Show Gist options
  • Save andresriancho/5e8876d9b7fa71b1614e to your computer and use it in GitHub Desktop.
Save andresriancho/5e8876d9b7fa71b1614e to your computer and use it in GitHub Desktop.
Check for broken links in your django site in a unittest!
import subprocess
import unittest
import re
import shlex
import os
import time
FNULL = open(os.devnull, 'w')
class TestNo404(unittest.TestCase):
SETUP = ['python manage.py syncdb --noinput',
'python manage.py migrate --noinput']
ADDRESS = '127.0.0.1:65123'
RUNSERVER = 'python manage.py runserver %s' % ADDRESS
# pip install LinkChecker==9.2
LINK_CHECKER = 'linkchecker http://%s --ignore-url=__debug__'
LINK_CHECKER_OUTPUT = "That's it. (.*?) links in (.*?) URLs checked. (.*?)"\
" warnings found. (.*?) errors? found."
def test_no_404(self):
self.setup_django_runserver()
runserver_proc = self.start_django_runserver()
try:
stdout, links, urls, warnings, errors = self.run_linkchecker(self.ADDRESS)
# Adjust your settings here
self.assertGreater(links, 2000, stdout)
self.assertGreater(urls, 30, stdout)
self.assertEqual(warnings, 0, stdout)
self.assertEqual(errors, 0, stdout)
finally:
runserver_proc.terminate()
def setup_django_runserver(self):
for cmd in self.SETUP:
subprocess.check_call(shlex.split(cmd),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
def start_django_runserver(self):
p = subprocess.Popen(shlex.split(self.RUNSERVER),
stdout=FNULL,
stderr=subprocess.STDOUT)
# Let the daemon start
time.sleep(2)
return p
def run_linkchecker(self, address):
p = subprocess.Popen(self.LINK_CHECKER % address, shell=True,
stdout=subprocess.PIPE,
stderr=FNULL)
stdoutdata, stderrdata = p.communicate()
m = re.search(self.LINK_CHECKER_OUTPUT, stdoutdata)
if m is None:
self.assertTrue(False, stdoutdata)
links, urls, warnings, errors = (int(m.group(1)), int(m.group(2)),
int(m.group(3)), int(m.group(4)))
return stdoutdata, links, urls, warnings, errors
@andresriancho
Copy link
Author

The --ignore-url=__debug__ ignores the Django Debug Toolbar links.

@andresriancho
Copy link
Author

PS: This unittest is as effective as the linkchecker tool is. In other words, if your site is loaded with JavaScript and other client-side code, this won't work very well.

PS2: I'm not crawling past the login.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment