Skip to content

Instantly share code, notes, and snippets.

@arantius
Created November 25, 2009 17:33
Show Gist options
  • Save arantius/242890 to your computer and use it in GitHub Desktop.
Save arantius/242890 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import fnmatch
import operator
import re
import string
import sys
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
apis = ('GM_addStyle', 'GM_deleteValue', 'GM_getResourceText',
'GM_getResourceURL', 'GM_getValue', 'GM_listValues', 'GM_log',
'GM_openInTab', 'GM_registerMenuCommand', 'GM_setValue',
'GM_xmlhttpRequest', 'unsafeWindow', 'GM_info')
api_counts = {'all': 0, 'none': 0, 'eval':0}
for api in apis:
api_counts[api] = 0
meta_counts = {}
xhr_hosts = {}
set_hosts = {}
get_hosts = {}
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
def hostToDomain(host):
match = re.search(r'.*\.(.+......)$', host)
if match:
return match.group(1)
else:
return host
def domainsInMetadata(metadata):
if not metadata:
return set([])
includes = re.findall(r'@(?:include|match)\s+(.*)', metadata)
if not includes:
# No @include means "@include *" !
return 'infinity'
def urlToHost(url):
# Try to parse a reasonable URL.
match = re.search(r'^\w+.*?://([^/]+)', url)
if match:
return match.group(1)
# Try to parse a stranger pattern like "*.amazon.*/*".
match = re.search(r'^([^/]+)', url)
if match:
return match.group(1)
return url
includes = [x.strip() for x in includes]
hosts = set(map(urlToHost, includes))
for host in hosts:
if host == '*':
return 'infinity'
domains = set(map(hostToDomain, hosts))
return domains
def numDomainsInMetadata(metadata):
domains = domainsInMetadata(metadata)
if 'infinity' == domains:
return 'infinity'
return len(domainsInMetadata(metadata))
def domainsInXhr(source):
regex = re.compile(r"""GM_xmlhttpRequest[^}]+url[^}]+https?://([^'"/]+)""", re.S)
hosts = re.findall(regex, source)
domains = set(map(hostToDomain, hosts))
return set(domains)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
for i, filename in enumerate( sys.stdin.readlines() ):
filename = filename.strip()
api_counts['all'] += 1
any_api = False
source = file(filename).read()
source_clean = source
# Strip strings.
source_clean = re.sub("""\\\\['"]""", '', source_clean)
source_clean = re.sub("""(['"]).*?\\1""", '', source_clean)
# Strip comments.
source_clean = re.sub(r'/\*.*?\*/', '', source_clean, re.S)
source_clean = re.sub('//.*\r?\n?', '', source_clean)
this_apis = {}
for api in apis:
this_apis[api] = False
if api in source_clean:
this_apis[api] = True
any_api = True
api_counts[api] += 1
if re.search(r'\beval\b', source_clean):
api_counts['eval'] +=1
if not any_api:
api_counts['none'] += 1
metadata = re.search(r'// ==UserScript==(.*?)// ==/UserScript==', source, re.S)
if metadata and metadata.group(1):
metadata = metadata.group(1)
for meta in set(re.findall(r'// (@[^\s]+)', metadata)):
meta_counts.setdefault(meta, 0)
meta_counts[meta] += 1
numDomains = numDomainsInMetadata(metadata)
if this_apis['GM_setValue']:
set_hosts.setdefault(numDomains, 0)
set_hosts[numDomains] += 1
if this_apis['GM_getValue']:
get_hosts.setdefault(numDomains, 0)
get_hosts[numDomains] += 1
if this_apis['GM_xmlhttpRequest']:
numDomains = 0
metaDomains = domainsInMetadata(metadata)
if 'infinity' == metaDomains:
numDomains = 'infinity'
else:
xhrDomains = domainsInXhr(source)
if not xhrDomains:
numDomains = 'unknown'
else:
for xd in list(xhrDomains):
for md in metaDomains:
if fnmatch.fnmatch(xd, md):
# If the XHR domain matches an include glob, it is not
# a distinct cross-origin domain, remove it.
xhrDomains.remove(xd)
break
numDomains = len(set( metaDomains | xhrDomains ))
if numDomains == 2:
if xhrDomains == set(['userscripts.org']):
# Ignore scripts that just call uso for update checks;
# they should be auto-updating now.
this_apis['GM_xmlhttpRequest'] -= 1
continue
xhr_hosts.setdefault(numDomains, 0)
xhr_hosts[numDomains] += 1
print ""
print "%10s %s" % ('Number', 'API')
for api, count in sorted(api_counts.items(), key=operator.itemgetter(1), reverse=True):
print "%10d %s" % (count, api)
print ""
print "%10s %s" % ('Number', 'Imperative')
for meta, count in sorted(meta_counts.items(), key=operator.itemgetter(1), reverse=True):
print "%10d %s" % (count, meta)
print ""
print "%10s %s" % ('Number', 'Distinct hosts (XHR)')
print "%10d %s" % (api_counts['GM_xmlhttpRequest'], 'all scripts')
for hosts, count in sorted(xhr_hosts.items(), key=operator.itemgetter(1), reverse=True):
print "%10d %s" % (count, hosts)
print "%10s %s" % ('Number', 'Distinct hosts (set)')
print "%10d %s" % (api_counts['GM_setValue'], 'all scripts')
for hosts, count in sorted(set_hosts.items(), key=operator.itemgetter(1), reverse=True):
print "%10d %s" % (count, hosts)
print "%10s %s" % ('Number', 'Distinct hosts (get)')
print "%10d %s" % (api_counts['GM_setValue'], 'all scripts')
for hosts, count in sorted(get_hosts.items(), key=operator.itemgetter(1), reverse=True):
print "%10d %s" % (count, hosts)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment