Skip to content

Instantly share code, notes, and snippets.

@Fingel
Created November 13, 2013 02:41
Show Gist options
  • Save Fingel/7442752 to your computer and use it in GitHub Desktop.
Save Fingel/7442752 to your computer and use it in GitHub Desktop.
Check Alexa top sites to see who uses https
import urllib, urllib2, subprocess, re, csv, pickle
from collections import OrderedDict
urls = []
usessl = 0
sites = OrderedDict()
CHECK = 1000
with open('top-1m.csv', 'rb') as csvfile:
i = 0
reader = csv.reader(csvfile)
for row in reader:
if (i < CHECK):
urls.append(row[1])
i+=1
else:
break
csvfile.close()
for url in urls:
try:
data = subprocess.check_output(['curl', '-LI', '-m', '3', '-A', 'Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)', url])
redirect = re.compile('location: https', re.IGNORECASE)
if (redirect.search(data)):
print '%s: https' % url
usessl+=1
sites[url] = True;
else:
data = subprocess.check_output(['curl', '-Lv','-m', '3', '-A', 'Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)', url])
redirect = re.compile('((?=.*<meta)(?=.*http-equiv=(\'|")refresh(\'|"))(?=.*content=(\'|)"\d*;?URL=(\'|")*https:))|(window\.location.+(\'|")+https.+(\'|"))', re.IGNORECASE)
if(redirect.search(data)):
print '%s: meta refresh' %url
usessl+=1
sites[url] = True;
else:
print '%s: bad news bears' %url
sites[url] = False;
except:
sites[url] = False
print ("%s out of %s websites use ssl" % (usessl, len(urls)))
pickle.dump(sites, open('sites.p', 'wb'))
f = open('table.html', 'w')
i = 1
for site in sites:
if(sites[site]):
f.write("<tr><td>%s</td><td>%s</td><td class=\"yes\">Yes</td></tr>\n" % (i, site))
else:
f.write("<tr><td>%s</td><td>%s</td><td class=\"no\">No</td></tr>\n" % (i, site))
i+=1
f.close
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment