Created
November 25, 2009 17:33
-
-
Save arantius/242890 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import fnmatch | |
import operator | |
import re | |
import string | |
import sys | |
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # | |
apis = ('GM_addStyle', 'GM_deleteValue', 'GM_getResourceText', | |
'GM_getResourceURL', 'GM_getValue', 'GM_listValues', 'GM_log', | |
'GM_openInTab', 'GM_registerMenuCommand', 'GM_setValue', | |
'GM_xmlhttpRequest', 'unsafeWindow', 'GM_info') | |
api_counts = {'all': 0, 'none': 0, 'eval':0} | |
for api in apis: | |
api_counts[api] = 0 | |
meta_counts = {} | |
xhr_hosts = {} | |
set_hosts = {} | |
get_hosts = {} | |
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # | |
def hostToDomain(host): | |
match = re.search(r'.*\.(.+......)$', host) | |
if match: | |
return match.group(1) | |
else: | |
return host | |
def domainsInMetadata(metadata): | |
if not metadata: | |
return set([]) | |
includes = re.findall(r'@(?:include|match)\s+(.*)', metadata) | |
if not includes: | |
# No @include means "@include *" ! | |
return 'infinity' | |
def urlToHost(url): | |
# Try to parse a reasonable URL. | |
match = re.search(r'^\w+.*?://([^/]+)', url) | |
if match: | |
return match.group(1) | |
# Try to parse a stranger pattern like "*.amazon.*/*". | |
match = re.search(r'^([^/]+)', url) | |
if match: | |
return match.group(1) | |
return url | |
includes = [x.strip() for x in includes] | |
hosts = set(map(urlToHost, includes)) | |
for host in hosts: | |
if host == '*': | |
return 'infinity' | |
domains = set(map(hostToDomain, hosts)) | |
return domains | |
def numDomainsInMetadata(metadata): | |
domains = domainsInMetadata(metadata) | |
if 'infinity' == domains: | |
return 'infinity' | |
return len(domainsInMetadata(metadata)) | |
def domainsInXhr(source): | |
regex = re.compile(r"""GM_xmlhttpRequest[^}]+url[^}]+https?://([^'"/]+)""", re.S) | |
hosts = re.findall(regex, source) | |
domains = set(map(hostToDomain, hosts)) | |
return set(domains) | |
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # | |
for i, filename in enumerate( sys.stdin.readlines() ): | |
filename = filename.strip() | |
api_counts['all'] += 1 | |
any_api = False | |
source = file(filename).read() | |
source_clean = source | |
# Strip strings. | |
source_clean = re.sub("""\\\\['"]""", '', source_clean) | |
source_clean = re.sub("""(['"]).*?\\1""", '', source_clean) | |
# Strip comments. | |
source_clean = re.sub(r'/\*.*?\*/', '', source_clean, re.S) | |
source_clean = re.sub('//.*\r?\n?', '', source_clean) | |
this_apis = {} | |
for api in apis: | |
this_apis[api] = False | |
if api in source_clean: | |
this_apis[api] = True | |
any_api = True | |
api_counts[api] += 1 | |
if re.search(r'\beval\b', source_clean): | |
api_counts['eval'] +=1 | |
if not any_api: | |
api_counts['none'] += 1 | |
metadata = re.search(r'// ==UserScript==(.*?)// ==/UserScript==', source, re.S) | |
if metadata and metadata.group(1): | |
metadata = metadata.group(1) | |
for meta in set(re.findall(r'// (@[^\s]+)', metadata)): | |
meta_counts.setdefault(meta, 0) | |
meta_counts[meta] += 1 | |
numDomains = numDomainsInMetadata(metadata) | |
if this_apis['GM_setValue']: | |
set_hosts.setdefault(numDomains, 0) | |
set_hosts[numDomains] += 1 | |
if this_apis['GM_getValue']: | |
get_hosts.setdefault(numDomains, 0) | |
get_hosts[numDomains] += 1 | |
if this_apis['GM_xmlhttpRequest']: | |
numDomains = 0 | |
metaDomains = domainsInMetadata(metadata) | |
if 'infinity' == metaDomains: | |
numDomains = 'infinity' | |
else: | |
xhrDomains = domainsInXhr(source) | |
if not xhrDomains: | |
numDomains = 'unknown' | |
else: | |
for xd in list(xhrDomains): | |
for md in metaDomains: | |
if fnmatch.fnmatch(xd, md): | |
# If the XHR domain matches an include glob, it is not | |
# a distinct cross-origin domain, remove it. | |
xhrDomains.remove(xd) | |
break | |
numDomains = len(set( metaDomains | xhrDomains )) | |
if numDomains == 2: | |
if xhrDomains == set(['userscripts.org']): | |
# Ignore scripts that just call uso for update checks; | |
# they should be auto-updating now. | |
this_apis['GM_xmlhttpRequest'] -= 1 | |
continue | |
xhr_hosts.setdefault(numDomains, 0) | |
xhr_hosts[numDomains] += 1 | |
print "" | |
print "%10s %s" % ('Number', 'API') | |
for api, count in sorted(api_counts.items(), key=operator.itemgetter(1), reverse=True): | |
print "%10d %s" % (count, api) | |
print "" | |
print "%10s %s" % ('Number', 'Imperative') | |
for meta, count in sorted(meta_counts.items(), key=operator.itemgetter(1), reverse=True): | |
print "%10d %s" % (count, meta) | |
print "" | |
print "%10s %s" % ('Number', 'Distinct hosts (XHR)') | |
print "%10d %s" % (api_counts['GM_xmlhttpRequest'], 'all scripts') | |
for hosts, count in sorted(xhr_hosts.items(), key=operator.itemgetter(1), reverse=True): | |
print "%10d %s" % (count, hosts) | |
print "%10s %s" % ('Number', 'Distinct hosts (set)') | |
print "%10d %s" % (api_counts['GM_setValue'], 'all scripts') | |
for hosts, count in sorted(set_hosts.items(), key=operator.itemgetter(1), reverse=True): | |
print "%10d %s" % (count, hosts) | |
print "%10s %s" % ('Number', 'Distinct hosts (get)') | |
print "%10d %s" % (api_counts['GM_setValue'], 'all scripts') | |
for hosts, count in sorted(get_hosts.items(), key=operator.itemgetter(1), reverse=True): | |
print "%10d %s" % (count, hosts) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment