Created
February 10, 2011 17:49
-
-
Save psychemedia/820965 to your computer and use it in GitHub Desktop.
Search landing page of UK HE library website for evidence of Google Analytics tracking code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import simplejson,urllib,re,csv | |
writer = csv.writer(open("HElibGoogalytics.csv", "wb")) | |
url="http://pipes.yahoo.com/pipes/pipe.run?_id=f257abdf5a1b6486afa56ad1ed63af8c&_render=json" | |
data = simplejson.load(urllib.urlopen(url)) | |
totcount=0 | |
gacount=0 | |
ngacount=0 | |
broken=0 | |
regex = re.compile('google-analytics.com/ga.js') | |
for i in data['value']['items']: | |
if 'library_website' in i: | |
totcount=totcount+1 | |
try: | |
content = urllib.urlopen(i['library_website']).read() | |
m = regex.search(content) | |
if m: | |
print i['library_website'],"has googalytics" | |
writer.writerow([i['library_website'],"has googalytics"]) | |
gacount=gacount+1 | |
else: | |
print i['library_website'],"doesn't have googalytics" | |
writer.writerow([i['library_website'],"doesn't have googalytics"]) | |
ngacount=ngacount+1 | |
except: | |
broken=broken+1 | |
print "Total number of websites checked",totcount | |
print "Number with Google Analytics code detected",gacount,'Percentage:',float(gacount)/float(totcount) | |
print "Number without Google Analytics code detected",ngacount,'Percentage:',float(ngacount)/float(totcount) | |
print "Number of pages failed to load",broken,'Percentage:',float(broken)/float(totcount) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment