Created
June 29, 2017 13:02
-
-
Save miraculixx/be45b7434fbfabef4188267e793eece0 to your computer and use it in GitHub Desktop.
getlicense -- collect license information in your project
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def getlicenses(dir=None): | |
""" | |
simple licence collector | |
""" | |
import re | |
dir = dir or '.' | |
LICENSE_FILES = r'LICENSE.*' | |
SOURCE_FILES = r'(.*css$)|(.*js$)|(.*min$)|(.*json$)' | |
LICENSE_IDS = r'.*(MIT|BSD|GPL|GNU.GPL|LPGL|APACHE).*' | |
SOURCE_LINKS = r'.*(http.*://\W*\s)' | |
COPYRIGHT = r'.*(copyright.*|\(c\).[0-9]+.*|©.[0-9]+.*).*' | |
TITLE = r'.*({file}|@package.*|\s\*\s.*)' | |
#EMAIL = r'\s?([^@]+@[^@]+\.[^@]+)|(\w*.at.[^@]+\.[^@]+)\s+' | |
EMAIL = r'\s([^@]+@[^@]+\.[^@]+)\s' | |
EMAIL = r"^\w+([-+.']\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*$" | |
lic_match = lambda lic : re.match(LICENSE_IDS, lic, re.IGNORECASE) | |
cr_match = lambda cr : re.match(COPYRIGHT, cr, re.IGNORECASE) | |
title_match = lambda title : re.match(TITLE.format(**opts), title, re.IGNORECASE) | |
email_match = lambda email : re.match(EMAIL, email) | |
src_match = lambda src : re.match(SOURCE_LINKS, src) | |
packages = [] | |
# normalize license information | |
for dir, dirs, files in os.walk(dir): | |
for file in files: | |
if re.match(LICENSE_FILES, file): | |
with open(os.path.join(dir, file), 'r') as f: | |
txt = f.readlines() | |
fulltxt = ' '.join(txt).replace('\n', ' ') | |
lics = [lic_match(lic).groups()[0] for lic in txt if lic_match(lic)] | |
copyrights = [cr for cr in txt if cr_match(cr)] | |
emails = [email_match(email).groups()[0] for email in txt if email_match(email)] | |
sources = [src_match(source).groups()[0] for source in txt if src_match(source)] | |
# actual license file | |
info = dict(dir=dir, | |
file=file, | |
lic=lics[0] if lics else '', | |
source=sources[0] if sources else 'n/a', | |
copyright=copyrights[0] if copyrights else 'n/a', | |
title=titles[0].replace(' * ', '') if titles else file, | |
email=emails[0] if emails else '', | |
author='', | |
licfile=file, | |
) | |
packages.append(info) | |
elif re.match(SOURCE_FILES, file): | |
with open(os.path.join(dir, file), 'r') as f: | |
src = f.readlines() | |
fullsrc = ' '.join(src).replace('\n', ' ') | |
licfile = '' | |
if '.json' in file: | |
import json | |
# see if we have some sensible content | |
descr = json.loads(fullsrc) | |
lic = descr.get('license') | |
title = descr.get('title') or descr.get('name') | |
author = descr.get('author', '') | |
if isinstance(author, dict): | |
email = author.get('email', '') | |
author = author.get('name', '') | |
else: | |
# any other file | |
lic = re.match(LICENSE_IDS, fullsrc) | |
if lic: | |
# got licence data, generate | |
copyright = [cr_match(cr).groups()[0] for cr in src if cr_match(cr)] | |
opts = dict(dir=dir, file=file) | |
titles = [title for title in src if title_match(title)] | |
emails = [email_match(email).groups()[0] for email in src if email_match(email)] | |
sources = [src_match(source).groups()[0] for source in src if src_match(source)] | |
author = '' | |
# generate info | |
info = dict(dir=dir, | |
file=file, | |
lic=lic.groups()[0], | |
source=sources[0] if sources else 'n/a', | |
copyright=copyright[0] if copyright else 'n/a', | |
title=titles[0].replace(' * ', '') if titles else file, | |
email=emails[0] if emails else '', | |
author=author if author else '', | |
licfile=licfile, | |
) | |
packages.append(info) | |
# write licence file | |
with open('licenses.json', 'w') as licf: | |
licf.write(json.dumps(packages)) | |
with open('licenses.md', 'w') as licf: | |
for pkg in packages: | |
txt = ( | |
'This product includes software from {title} as licensed', | |
' {author} {email} under the terms of the {license} license.', | |
'\n{copyright}\n', | |
'\n\n```\n{lictext}\n```\n\n', | |
'\n\n---\n\n', | |
) | |
txt = ' '.join(txt) | |
if not pkg.get('licfile'): | |
try: | |
import license as liclookup | |
# requires git+https://github.com/miraculixx/license.git | |
license = liclookup.find_by_key('rpm', pkg.get('lic'))[0] | |
except: | |
lictext = '' | |
licurl = '' | |
else: | |
lictext = license.render(name=pkg.get('title'), | |
email=pkg.get('email', '')) | |
licurl = license.url | |
else: | |
fn = os.path.join(pkg.get('dir'), pkg.get('licfile')) | |
if fn: | |
with open(fn, 'r') as origlic: | |
lictext = origlic.readlines() | |
lictext = '\n'.join(lictext) | |
opts=dict( | |
title=pkg.get('title', ''), | |
author=pkg.get('author', ''), | |
email=pkg.get('email', ''), | |
copyright=pkg.get('copyright', ''), | |
license=pkg.get('lic', 'following'), | |
lictext=lictext, | |
licurl=licurl, | |
) | |
for k, v in opts.iteritems(): | |
opts[k] = v.replace('\n', ' ') if v else '' | |
licf.write(txt.format(**opts)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment