Skip to content

Instantly share code, notes, and snippets.

@imankulov
Created January 24, 2012 04:18
Show Gist options
  • Save imankulov/1667784 to your computer and use it in GitHub Desktop.
Save imankulov/1667784 to your computer and use it in GitHub Desktop.
Simple script to show diff between two HTML pages in understandable format
#!/usr/bin/env python
""" Simple script to show diff between two HTML pages in understandable format """
import urllib2, tidy, sys, subprocess
tidy_kwargs = dict(
output_xml=1,
indent=1,
indent_attributes=1,
wrap=80,
char_encoding='raw',
tidy_mark=0,
force_output=1,
quote_marks=1,
word_2000=1,
indent_spaces=0,
wrap_attributes=1,
)
def read_website(website, output_filename):
url = 'http://%s' % website
content = urllib2.urlopen(url).read()
clean_content = str(tidy.parseString(content, **tidy_kwargs))
f = open(output_filename, 'w')
f.write(clean_content)
f.close()
def print_diff(file1, file2):
subprocess.call(
[
'bash', '-c',
'diff -u %s %s | colordiff | less -R' % (file1, file2)
])
read_website(website, '/tmp/before.xhtml')
# Do smth...
read_website(website, '/tmp/after.xhtml')
print_diff('/tmp/before.xhtml', '/tmp/after.xhtml')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment