Skip to content

Instantly share code, notes, and snippets.

@pingjiang
Created July 20, 2014 23:02
Show Gist options
  • Save pingjiang/7d97afeb53ff56def2a2 to your computer and use it in GitHub Desktop.
Save pingjiang/7d97afeb53ff56def2a2 to your computer and use it in GitHub Desktop.
抓取SAE支持CDN库的列表
#!/usr/bin/env python
#--*-- coding: UTF-8 --*--
import urllib
import re
import sys
sae_cdn_libs = ['angular.js', 'backbone', 'bootstrap', 'dojo', 'ext-core', 'highcharts',
'highstock', 'jq.mobi', 'jquery', 'jquery-mobile', 'jquery-ui', 'jquery.cookie',
'jquery.migrate', 'jquerytools', 'json2', 'lesscss', 'mootools', 'prototype', 'qunit',
'scriptaculous', 'swfobject', 'underscore', 'webfont', 'wlige', 'yui', 'zepto']
lib_fix = {
'jquery.cookie.js' : 'http://lib.sinaapp.com/js/jquery.cookie/jquery.cookie.js'
}
RE_HREF = re.compile('href="([^"]*)"')
BASE_URL = 'http://lib.sinaapp.com'
URL_PREFIX = '/?path='
cache = {}
results = []
result_file = 'results.txt'
def website_walker(f, url, pattern, max_depth = 10):
# print 'walk ' + url
if max_depth <= 0:
return
if cache.has_key(url):
links = cache[url]
else:
try:
html = urllib.urlopen(url).read()
links = RE_HREF.findall(html)
cache[url] = links
except IOError, e:
print 'error fetching %s: %s' % (url, e)
return
for link in links:
if link.endswith('/docs') or link.endswith('themes') or link.endswith('i18n'):
continue
if not link.startswith(URL_PREFIX):
if link.startswith('/js/') and (link.endswith('.js') or link.endswith('.css')):
results.append(link)
f.write(link + '\n')
continue
if link == URL_PREFIX:
continue
next_url = BASE_URL + link
website_walker(f, next_url, link, max_depth - 1)
def print_markdown():
for sae_cnd_lib in sae_cdn_libs:
lib_css_url = 'http://lib.sinaapp.com/js/%s/latest/css/%s'%(sae_cnd_lib, sae_cnd_lib)
lib_js_url = 'http://lib.sinaapp.com/js/%s/latest/js/%s'%(sae_cnd_lib, sae_cnd_lib)
print '''### %s
%s.css
%s.js
%s.min.css
%s.min.js
'''%(sae_cnd_lib, lib_css_url, lib_js_url, lib_css_url, lib_js_url)
def main():
"""docstring for main"""
with open(result_file, 'w') as f:
website_walker(f, BASE_URL, URL_PREFIX)
lib_map = {}
for result in results:
parts = result.split('/')
if len(parts) > 2:
lib_name = parts[2]
if not lib_map.has_key(lib_name):
lib_map[lib_name] = []
lib_map[lib_name].append(result)
for key in lib_map.keys():
print '### ' + key
print '\t' + BASE_URL + ('\n\t' + BASE_URL).join(lib_map[key])
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment