Skip to content

Instantly share code, notes, and snippets.

@roadsideseb
Created February 13, 2014 06:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save roadsideseb/8970562 to your computer and use it in GitHub Desktop.
Save roadsideseb/8970562 to your computer and use it in GitHub Desktop.
A little script to pull all assets from 2 URL and compare the found asset URLs as well as their actual content.
#! /usr/bin/env python
import re
import sys
import difflib
import requests
from purl import URL
URL_REGEX = re.compile(
r'(href|src)="(?P<domain>((http|ftp)s?:)?//[^/]*)?(?P<link>.*?\.(css|js)?)"')
def get_asset_dict(content, url):
asset_dict = {}
for result in URL_REGEX.findall(content):
__, domain, __, __, path, __ = result
if not domain:
domain = "{}://{}".format(url.scheme(), url.netloc())
full_url = "{}{}".format(domain, path)
if full_url.startswith('//'):
full_url = 'http:' + full_url
asset_dict[path] = full_url
return asset_dict
local_url = URL(sys.argv[1])
remote_url = URL(sys.argv[2])
print "Get local content from", local_url
local_content = requests.get(local_url.as_string()).content
print "Get remote content from", remote_url
remote_content = requests.get(remote_url.as_string()).content
local_assets = get_asset_dict(local_content, local_url)
remote_assets = get_asset_dict(remote_content, remote_url)
print "-" * 50
print "Checking assets for changes"
print "-" * 50
difference = list(difflib.context_diff(local_assets.keys(), remote_assets.keys()))
if difference:
print "\n".join(difference)
else:
print "No difference in included js/css files"
print '-' * 50
merged_assets = {}
for path in local_assets:
print '-' * 50
print "Diffing the file '{}' on both sites:".format(path)
print "downloading", remote_assets[path]
remote_asset = requests.get(remote_assets[path]).content.split('\n')
print "downloading", local_assets[path]
local_asset = requests.get(local_assets[path]).content.split('\n')
result = difflib.context_diff(remote_asset, local_asset)
if not result:
print "NO DIFF for '{}'".format(path)
else:
print "\n".join(result)
print '-' * 50
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment