Skip to content

Instantly share code, notes, and snippets.

@miraculixx
Created June 29, 2017 13:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save miraculixx/be45b7434fbfabef4188267e793eece0 to your computer and use it in GitHub Desktop.
Save miraculixx/be45b7434fbfabef4188267e793eece0 to your computer and use it in GitHub Desktop.
getlicense -- collect license information in your project
def getlicenses(dir=None):
"""
simple licence collector
"""
import re
dir = dir or '.'
LICENSE_FILES = r'LICENSE.*'
SOURCE_FILES = r'(.*css$)|(.*js$)|(.*min$)|(.*json$)'
LICENSE_IDS = r'.*(MIT|BSD|GPL|GNU.GPL|LPGL|APACHE).*'
SOURCE_LINKS = r'.*(http.*://\W*\s)'
COPYRIGHT = r'.*(copyright.*|\(c\).[0-9]+.*|©.[0-9]+.*).*'
TITLE = r'.*({file}|@package.*|\s\*\s.*)'
#EMAIL = r'\s?([^@]+@[^@]+\.[^@]+)|(\w*.at.[^@]+\.[^@]+)\s+'
EMAIL = r'\s([^@]+@[^@]+\.[^@]+)\s'
EMAIL = r"^\w+([-+.']\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*$"
lic_match = lambda lic : re.match(LICENSE_IDS, lic, re.IGNORECASE)
cr_match = lambda cr : re.match(COPYRIGHT, cr, re.IGNORECASE)
title_match = lambda title : re.match(TITLE.format(**opts), title, re.IGNORECASE)
email_match = lambda email : re.match(EMAIL, email)
src_match = lambda src : re.match(SOURCE_LINKS, src)
packages = []
# normalize license information
for dir, dirs, files in os.walk(dir):
for file in files:
if re.match(LICENSE_FILES, file):
with open(os.path.join(dir, file), 'r') as f:
txt = f.readlines()
fulltxt = ' '.join(txt).replace('\n', ' ')
lics = [lic_match(lic).groups()[0] for lic in txt if lic_match(lic)]
copyrights = [cr for cr in txt if cr_match(cr)]
emails = [email_match(email).groups()[0] for email in txt if email_match(email)]
sources = [src_match(source).groups()[0] for source in txt if src_match(source)]
# actual license file
info = dict(dir=dir,
file=file,
lic=lics[0] if lics else '',
source=sources[0] if sources else 'n/a',
copyright=copyrights[0] if copyrights else 'n/a',
title=titles[0].replace(' * ', '') if titles else file,
email=emails[0] if emails else '',
author='',
licfile=file,
)
packages.append(info)
elif re.match(SOURCE_FILES, file):
with open(os.path.join(dir, file), 'r') as f:
src = f.readlines()
fullsrc = ' '.join(src).replace('\n', ' ')
licfile = ''
if '.json' in file:
import json
# see if we have some sensible content
descr = json.loads(fullsrc)
lic = descr.get('license')
title = descr.get('title') or descr.get('name')
author = descr.get('author', '')
if isinstance(author, dict):
email = author.get('email', '')
author = author.get('name', '')
else:
# any other file
lic = re.match(LICENSE_IDS, fullsrc)
if lic:
# got licence data, generate
copyright = [cr_match(cr).groups()[0] for cr in src if cr_match(cr)]
opts = dict(dir=dir, file=file)
titles = [title for title in src if title_match(title)]
emails = [email_match(email).groups()[0] for email in src if email_match(email)]
sources = [src_match(source).groups()[0] for source in src if src_match(source)]
author = ''
# generate info
info = dict(dir=dir,
file=file,
lic=lic.groups()[0],
source=sources[0] if sources else 'n/a',
copyright=copyright[0] if copyright else 'n/a',
title=titles[0].replace(' * ', '') if titles else file,
email=emails[0] if emails else '',
author=author if author else '',
licfile=licfile,
)
packages.append(info)
# write licence file
with open('licenses.json', 'w') as licf:
licf.write(json.dumps(packages))
with open('licenses.md', 'w') as licf:
for pkg in packages:
txt = (
'This product includes software from {title} as licensed',
' {author} {email} under the terms of the {license} license.',
'\n{copyright}\n',
'\n\n```\n{lictext}\n```\n\n',
'\n\n---\n\n',
)
txt = ' '.join(txt)
if not pkg.get('licfile'):
try:
import license as liclookup
# requires git+https://github.com/miraculixx/license.git
license = liclookup.find_by_key('rpm', pkg.get('lic'))[0]
except:
lictext = ''
licurl = ''
else:
lictext = license.render(name=pkg.get('title'),
email=pkg.get('email', ''))
licurl = license.url
else:
fn = os.path.join(pkg.get('dir'), pkg.get('licfile'))
if fn:
with open(fn, 'r') as origlic:
lictext = origlic.readlines()
lictext = '\n'.join(lictext)
opts=dict(
title=pkg.get('title', ''),
author=pkg.get('author', ''),
email=pkg.get('email', ''),
copyright=pkg.get('copyright', ''),
license=pkg.get('lic', 'following'),
lictext=lictext,
licurl=licurl,
)
for k, v in opts.iteritems():
opts[k] = v.replace('\n', ' ') if v else ''
licf.write(txt.format(**opts))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment