Skip to content

Instantly share code, notes, and snippets.

@int64ago
Created January 12, 2015 06:14
Show Gist options
  • Save int64ago/dcd02e166458b02dc613 to your computer and use it in GitHub Desktop.
Save int64ago/dcd02e166458b02dc613 to your computer and use it in GitHub Desktop.
Get data from cdnjs.com and save to json
#!/usr/bin/python2
import urllib, json
from BeautifulSoup import BeautifulSoup
data = {}
# inconformity
blacklist = ['mathjax', 'yui', 'documentup']
res = urllib.urlopen('https://cdnjs.com/').read()
soup = BeautifulSoup(res)
libs = soup.findAll('tr', attrs={'data-library-name': True})
for lib in libs:
lib_name = lib['data-library-name']
if lib_name in blacklist:
continue
res = urllib.urlopen('https://cdnjs.com/libraries/' + lib_name).read()
soup = BeautifulSoup(res)
vers = soup.findAll('div', attrs={'data-library-version': True})
_vers = []
for ver in vers:
ver_name = ver['data-library-version']
_vers.append(ver_name)
links = vers[0].findAll('p', attrs={'class': 'library-url'})
prefix = 'https://cdnjs.cloudflare.com/ajax/libs/' + lib_name + '/' + _vers[0] + '/'
items = []
for link in links:
items.append(link.string.replace('/','/')[len(prefix):])
data[lib_name] = {'v': _vers, 'i': items}
with open('/root/qiniu/data.json', 'w') as out:
json.dump(data, out)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment