Skip to content

Instantly share code, notes, and snippets.

@ngbrown
Created May 8, 2021 19:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ngbrown/577308a0bfbbd35a3c565f206011f8dd to your computer and use it in GitHub Desktop.
Save ngbrown/577308a0bfbbd35a3c565f206011f8dd to your computer and use it in GitHub Desktop.
Filter for git-filter-repo to convert all text files to LF line endings, for example after a Mercurial conversion
#!/usr/bin/env python3
"""
This is a simple program that will run a linting program on all non-binary
files in history. It also rewrites commit hashes in commit messages to
refer to the new commits with the rewritten files.
See https://github.com/newren/git-filter-repo/issues/45
and https://github.com/newren/git-filter-repo/blob/main/contrib/filter-repo-demos/lint-history
"""
"""
Please see the
***** API BACKWARD COMPATIBILITY CAVEAT *****
near the top of git-filter-repo.
"""
import argparse
import os
import subprocess
import tempfile
try:
import git_filter_repo as fr
except ImportError:
raise SystemExit("Error: Couldn't find git_filter_repo.py. Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?")
example_text = '''CALLBACK
EXAMPLES
'''
parser = argparse.ArgumentParser(description='Run a program (e.g. code formatter or linter) on files in history',
epilog = example_text,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('--replace-refs', default=None,
choices=['delete-no-add', 'delete-and-add',
'update-no-add', 'update-or-add',
'update-and-add'])
lint_args = parser.parse_args()
binary_extensions = [
b".exe", b".pdf", b".bmp", b".cur", b".dll", b".doc", b".docx", b".ico", b".jpg", b".ocx",
b".xls", b".xlsx", b".png", b".gif", b".mp4", b".swf", b".o", b".od", b".gch", b".pch",
b".a", b".gz", b".zip", b".tar", b".bin"
]
def is_relevant(filename: bytearray):
just_filename = filename.rpartition(b"/")[2]
extension = b"".join(just_filename.rpartition(b".")[1:]).lower()
return not extension in binary_extensions
tmpdir = None
blobs_handled = {}
cat_file_process = None
def lint_with_real_filenames(commit, metadata):
for change in commit.file_changes:
if change.blob_id in blobs_handled:
change.blob_id = blobs_handled[change.blob_id]
elif change.type == b'D':
continue
elif not is_relevant(change.filename):
continue
else:
# Get the old blob contents
cat_file_process.stdin.write(change.blob_id + b'\n')
cat_file_process.stdin.flush()
objhash, objtype, objsize = cat_file_process.stdout.readline().split()
contents_plus_newline = cat_file_process.stdout.read(int(objsize)+1)
blob_data = contents_plus_newline[:-1]
# skip file if binary or if no windows newlines
if b"\0" in blob_data[0:8192] or blob_data.find(b"\r\n") == -1:
# Record as processed
blobs_handled[change.blob_id] = change.blob_id
continue
blob_data = blob_data.replace(b"\r\n", b"\n")
blob = fr.Blob(blob_data)
# Insert the new file into the filter's stream
filter.insert(blob)
# Record our handling of the blob and use it for this change
blobs_handled[change.blob_id] = blob.id
change.blob_id = blob.id
args = fr.FilteringOptions.default_options()
args.force = True
args.replace_refs = lint_args.replace_refs
tmpdir = tempfile.mkdtemp().encode()
cat_file_process = subprocess.Popen(['git', 'cat-file', '--batch'],
stdin = subprocess.PIPE,
stdout = subprocess.PIPE)
filter = fr.RepoFilter(args, commit_callback=lint_with_real_filenames)
filter.run()
cat_file_process.stdin.close()
cat_file_process.wait()
#!/usr/bin/env python3
"""
This is a simple program that will run a linting program on all non-binary
files in history. It also rewrites commit hashes in commit messages to
refer to the new commits with the rewritten files.
See https://github.com/newren/git-filter-repo/issues/45
and https://github.com/newren/git-filter-repo/blob/main/contrib/filter-repo-demos/lint-history
"""
"""
Please see the
***** API BACKWARD COMPATIBILITY CAVEAT *****
near the top of git-filter-repo.
"""
import argparse
import os
import subprocess
import tempfile
try:
import git_filter_repo as fr
except ImportError:
raise SystemExit("Error: Couldn't find git_filter_repo.py. Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?")
example_text = '''CALLBACK
EXAMPLES
'''
parser = argparse.ArgumentParser(description='Run a program (e.g. code formatter or linter) on files in history',
epilog = example_text,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('--replace-refs', default=None,
choices=['delete-no-add', 'delete-and-add',
'update-no-add', 'update-or-add',
'update-and-add'])
lint_args = parser.parse_args()
binary_extensions = [
b".exe", b".pdf", b".bmp", b".cur", b".dll", b".doc", b".docx", b".ico", b".jpg", b".ocx",
b".xls", b".xlsx", b".png", b".gif", b".mp4", b".swf", b".o", b".od", b".gch", b".pch",
b".a", b".gz", b".zip", b".tar", b".bin"
]
def is_relevant(filename: bytearray):
just_filename = filename.rpartition(b"/")[2]
extension = b"".join(just_filename.rpartition(b".")[1:]).lower()
return not extension in binary_extensions
tmpdir = None
blobs_handled = {}
cat_file_process = None
def lint_with_real_filenames(commit, metadata):
for change in commit.file_changes:
if change.blob_id in blobs_handled:
change.blob_id = blobs_handled[change.blob_id]
elif change.type == b'D':
continue
elif not is_relevant(change.filename):
continue
else:
# Get the old blob contents
cat_file_process.stdin.write(change.blob_id + b'\n')
cat_file_process.stdin.flush()
objhash, objtype, objsize = cat_file_process.stdout.readline().split()
contents_plus_newline = cat_file_process.stdout.read(int(objsize)+1)
blob_data = contents_plus_newline[:-1]
# skip file if binary or if no windows newlines
if b"\0" in blob_data[0:8192] or blob_data.find(b"\r\n") == -1:
# Record as processed
blobs_handled[change.blob_id] = change.blob_id
continue
blob_data = blob_data.replace(b"\r\n", b"\n")
blob = fr.Blob(blob_data)
# Insert the new file into the filter's stream
filter.insert(blob)
# Record our handling of the blob and use it for this change
blobs_handled[change.blob_id] = blob.id
change.blob_id = blob.id
args = fr.FilteringOptions.default_options()
args.force = True
args.replace_refs = lint_args.replace_refs
tmpdir = tempfile.mkdtemp().encode()
cat_file_process = subprocess.Popen(['git', 'cat-file', '--batch'],
stdin = subprocess.PIPE,
stdout = subprocess.PIPE)
filter = fr.RepoFilter(args, commit_callback=lint_with_real_filenames)
filter.run()
cat_file_process.stdin.close()
cat_file_process.wait()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment