Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Search landing page of UK HE library website for evidence of Google Analytics tracking code
import simplejson,urllib,re,csv
writer = csv.writer(open("HElibGoogalytics.csv", "wb"))
url="http://pipes.yahoo.com/pipes/pipe.run?_id=f257abdf5a1b6486afa56ad1ed63af8c&_render=json"
data = simplejson.load(urllib.urlopen(url))
totcount=0
gacount=0
ngacount=0
broken=0
regex = re.compile('google-analytics.com/ga.js')
for i in data['value']['items']:
if 'library_website' in i:
totcount=totcount+1
try:
content = urllib.urlopen(i['library_website']).read()
m = regex.search(content)
if m:
print i['library_website'],"has googalytics"
writer.writerow([i['library_website'],"has googalytics"])
gacount=gacount+1
else:
print i['library_website'],"doesn't have googalytics"
writer.writerow([i['library_website'],"doesn't have googalytics"])
ngacount=ngacount+1
except:
broken=broken+1
print "Total number of websites checked",totcount
print "Number with Google Analytics code detected",gacount,'Percentage:',float(gacount)/float(totcount)
print "Number without Google Analytics code detected",ngacount,'Percentage:',float(ngacount)/float(totcount)
print "Number of pages failed to load",broken,'Percentage:',float(broken)/float(totcount)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment