Skip to content

Instantly share code, notes, and snippets.

@felixfontein
Last active July 17, 2022 08:49
Show Gist options
  • Save felixfontein/3d9502e22d1461dc3baadddeeb9d9cd9 to your computer and use it in GitHub Desktop.
Save felixfontein/3d9502e22d1461dc3baadddeeb9d9cd9 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import os
import re
DEFAULT_COPYRIGHT_NOTICE = [
'Copyright (c) Ansible Project',
'GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt)',
'SPDX-License-Identifier: GPL-3.0-or-later',
]
EXTENSION_HANDLERS = {}
LICENSE_PATH_MATCHER = re.compile('(\(see )[^ ]+( or https?://[^)]+\))')
def read_lines(path):
with open(path, 'rb') as f:
content = f.read().decode('utf-8')
lines = content.splitlines()
if content.endswith('\n'):
lines.append('')
return lines
def write_lines(path, lines):
content = '\n'.join(lines)
with open(path, 'wb') as f:
f.write(content.encode('utf-8'))
def identify_license(line):
if 'GNU General Public License' in line:
if 'v3.0+' in line:
return 'GPL-3.0-or-later'
if 'version 3 or later' in line:
return 'GPL-3.0-or-later'
if 'Simplified BSD License' in line:
return 'BSD-2-Clause'
if 'Apache License 2.0' in line:
return 'Apache-2.0'
if 'PSF License' in line or 'Python-2.0' in line:
return 'PSF-2.0'
if 'MIT License' in line:
return 'MIT'
return None
def adjust_license_path(line, license):
return LICENSE_PATH_MATCHER.sub(lambda m: '%sLICENSES/%s.txt%s' % (m.group(1), license, m.group(2)), line)
def handle_line_comments(path, line_comment_start, shebang_start=None, shebang=None, add_shebang=False):
lines = read_lines(path)
found_license = False
for index, line in enumerate(lines):
if not line.startswith(line_comment_start):
continue
if 'SPDX-License-Identifier: ' in line:
found_license = True
break
if found_license:
return
first_content_line = 0
if lines:
if shebang_start is not None:
if lines[first_content_line].startswith(shebang_start):
first_content_line += 1
if shebang is not None:
if lines[first_content_line] == shebang:
first_content_line += 1
if add_shebang and first_content_line == 0:
lines.insert(0, shebang)
first_content_line += 1
for line in DEFAULT_COPYRIGHT_NOTICE:
lines.insert(first_content_line, '%s %s' % (line_comment_start, line))
first_content_line += 1
if first_content_line < len(lines) and lines[first_content_line] != '':
lines.insert(first_content_line, '')
write_lines(path, lines)
EXTENSION_HANDLERS['.py'] = lambda path: handle_line_comments(path, '#', shebang_start='#!')
EXTENSION_HANDLERS['.sh'] = lambda path: handle_line_comments(path, '#', shebang_start='#!')
EXTENSION_HANDLERS['.yml'] = lambda path: handle_line_comments(path, '#', shebang='---', add_shebang=True)
EXTENSION_HANDLERS['.yaml'] = lambda path: handle_line_comments(path, '#', shebang='---', add_shebang=True)
EXTENSION_HANDLERS['.ps1'] = lambda path: handle_line_comments(path, '#')
EXTENSION_HANDLERS['.pem'] = lambda path: handle_line_comments(path, '#')
def process(path):
basename = os.path.basename(path)
extension = os.path.splitext(basename)[1]
handler = EXTENSION_HANDLERS.get(extension)
if basename == 'aliases':
handler = lambda path: handle_line_comments(path, '#')
if handler:
try:
handler(path)
except Exception as exc:
print('ERROR while processing %s: %s' % (path, exc))
current = os.getcwd()
for real_path, _, files in os.walk(current):
path = os.path.relpath(real_path, current)
if path.startswith('.git/') or path == '.git':
continue
if '/__pycache__' in path:
continue
for file in files:
real_file_path = os.path.join(real_path, file)
process(real_file_path)
#!/usr/bin/env python3
import os
import re
EXTENSION_HANDLERS = {}
LICENSE_PATH_MATCHER = re.compile('(\(see )[^ ]+( or https?://[^)]+\))')
def read_lines(path):
with open(path, 'rb') as f:
content = f.read().decode('utf-8')
lines = content.splitlines()
if content.endswith('\n'):
lines.append('')
return lines
def write_lines(path, lines):
content = '\n'.join(lines)
with open(path, 'wb') as f:
f.write(content.encode('utf-8'))
def identify_license(line):
if 'GNU General Public License' in line:
if 'v3.0+' in line:
return 'GPL-3.0-or-later'
if 'version 3 or later' in line:
return 'GPL-3.0-or-later'
if 'Simplified BSD License' in line:
return 'BSD-2-Clause'
if 'Apache License 2.0' in line:
return 'Apache-2.0'
if 'PSF License' in line or 'Python-2.0' in line:
return 'PSF-2.0'
if 'MIT License' in line:
return 'MIT'
return None
def adjust_license_path(line, license):
return LICENSE_PATH_MATCHER.sub(lambda m: '%sLICENSES/%s.txt%s' % (m.group(1), license, m.group(2)), line)
def handle_line_comments(path, line_comment_start):
lines = read_lines(path)
has_license = None
for index, line in enumerate(lines):
if not line.startswith(line_comment_start):
continue
pos = line.find('SPDX-License-Identifier: ')
if pos >= 0:
has_license = line[pos + len('SPDX-License-Identifier: '):]
found_license = False
for index, line in enumerate(lines):
if not line.startswith(line_comment_start):
continue
line = line[len(line_comment_start):]
license = identify_license(line)
if license is not None:
if has_license is not None and license != has_license:
raise Exception('SPDX license identifier (%s) does not match detected license (%s) from line: %s' % (has_license, license, line))
if found_license:
raise Exception('Found more than one license line. The second line: %s' % (line, ))
found_license = True
lines[index] = '%s%s' % (line_comment_start, adjust_license_path(line, license))
if not has_license:
lines.insert(index + 1, '%s SPDX-License-Identifier: %s' % (line_comment_start, license))
write_lines(path, lines)
EXTENSION_HANDLERS['.py'] = lambda path: handle_line_comments(path, '#')
EXTENSION_HANDLERS['.yml'] = lambda path: handle_line_comments(path, '#')
EXTENSION_HANDLERS['.yaml'] = lambda path: handle_line_comments(path, '#')
EXTENSION_HANDLERS['.ps1'] = lambda path: handle_line_comments(path, '#')
def process(path):
extension = os.path.splitext(path)[1]
handler = EXTENSION_HANDLERS.get(extension)
if handler:
try:
handler(path)
except Exception as exc:
print('ERROR while processing %s: %s' % (path, exc))
current = os.getcwd()
for real_path, _, files in os.walk(current):
path = os.path.relpath(real_path, current)
if path.startswith('.git/') or path == '.git':
continue
if '/__pycache__' in path:
continue
for file in files:
real_file_path = os.path.join(real_path, file)
process(real_file_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment