Skip to content

Instantly share code, notes, and snippets.

@philpennock
Created March 24, 2023 21:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save philpennock/4dbeb8e053c77e6e3a6507975c0ff4ff to your computer and use it in GitHub Desktop.
Save philpennock/4dbeb8e053c77e6e3a6507975c0ff4ff to your computer and use it in GitHub Desktop.
Find VCS repos under the cwd and report their paths
#!/usr/bin/env python3
"""
find_to_repo: find all dirs under the reference point which are repos
Stop at repos, don't descend into the repos.
Handling submodules etc is explicitly out of scope.
Use --help to see help.
Assumes under current directory if no directories given.
"""
from __future__ import print_function
__author__ = 'phil@pennock-tech.com (Phil Pennock)'
import argparse
import os
import stat
import sys
# We hard-code this one as internal logic
GIT_BARE_NEEDALL_DIRS = frozenset(['info', 'objects', 'refs'])
# This is used by some update tooling
SKIP_CHILDREN_FILENAME = '.skip-children' # selectively skip children
SKIP_ALL_FILENAME = '.skip-updates' # "prune here"
REPO_RECURSE_FLAGFILES = frozenset(['.update-children', SKIP_ALL_FILENAME])
class Error(Exception):
"""Base class for exceptions from find_to_repo."""
pass
class Exit(Error):
"""Errors which exit cleanly without a stack trace."""
pass
class Walker(object):
def __init__(self, options):
self.repo_dirs = set(options.repo_dirs)
self.bundle_exts = set(options.bundle_exts)
self.dir_exts = set(options.dir_exts)
self.only_show = set(options.only_repo_types)
self.use_only_show = bool(self.only_show)
self.exclude_paths = set(options.exclude_paths)
self.required_meta_file = options.meta_file
self.required_exists_file = options.exists_file
self.verbose = options.verbose
self.symlinks_file = None
self.warn_no_flagfiles = options.warn_no_flagfiles
self.obey_skips = options.obey_skips
if options.symlinks_fd is not None and options.symlinks_fd >= 0:
self.symlinks_file = os.fdopen(options.symlinks_fd, mode='wt', buffering=1, encoding='UTF-8')
def find_under(self, top):
for root, dirs, files in os.walk(top, topdown=True):
if root in self.exclude_paths:
del dirs[:]
continue
# For our purposes, a repo is one of:
# 1. A file
# 2. A directory with a flag-directory within it
# 3. A directory named a particular way (bare .git repos)
# 4. Fallback for bare git repos not named that way
is_repo_thisdir = False
for bundle in [f for f in files if os.path.splitext(f)[1] in self.bundle_exts]:
full_repo, ext = os.path.join(root, bundle), os.path.splitext(bundle)[1]
is_repo_thisdir = True
if self.use_only_show and ext not in self.only_show:
continue
# File, no --meta-file support
if self.required_meta_file or self.required_exists_file:
pass
elif self.verbose:
print('{0}\t{1}'.format(ext, full_repo))
else:
print(full_repo)
dirs_set = set(dirs)
if not self.repo_dirs.isdisjoint(dirs_set):
# This is a checkout dir which contains a meta dir inside it, eg a .git/ sub-dir
is_repo_thisdir = True
if self.use_only_show and self.only_show.isdisjoint(dirs_set):
pass
else:
printable = root
if self.required_exists_file:
printable = os.path.join(root, self.required_exists_file)
if self.use_only_show:
seek = self.only_show
else:
seek = dirs_set
# := is too new to want to use it except when debugging, here
if self.required_meta_file and not os.path.exists(os.path.join(root, sorted(dirs_set.intersection(seek))[0], self.required_meta_file)):
# if self.required_meta_file and not os.path.exists(t := os.path.join(root, sorted(dirs_set.intersection(seek))[0], self.required_meta_file)):
# print('skipping {0} because missing {1}'.format(root, t))
pass
elif self.required_exists_file and not os.path.exists(os.path.join(root, self.required_exists_file)):
pass
elif self.verbose:
print('{1}\t{0}'.format(printable, ','.join(self.repo_dirs.intersection(dirs_set))))
else:
print(printable)
del dirs[:]
byname = set([d for d in dirs if os.path.splitext(d)[1] in self.dir_exts])
if byname:
for d in byname:
full_repo, ext = os.path.join(root, d), os.path.splitext(d)[1]
if self.use_only_show and ext not in self.only_show:
pass
else:
# this is a dir which is a "git bare repo" or moral equivalent, hinted at by the filename extension
if self.required_meta_file and not os.path.exists(os.path.join(full_repo, self.required_meta_file)):
# print('skipping {} [{}]'.format(full_repo, self.required_meta_file))
pass
elif self.required_exists_file:
# this can't exist inside a bare repo
pass
elif self.verbose:
print('{0}\t{1}'.format(ext, full_repo))
else:
print(full_repo)
for i in range(len(dirs)-1, -1, -1):
if dirs[i] in byname:
del dirs[i]
if dirs_set.intersection(GIT_BARE_NEEDALL_DIRS) == GIT_BARE_NEEDALL_DIRS:
# this is a git bare repo, not named to have .git extension
is_repo_thisdir = True
if self.use_only_show and '.git' not in self.only_show:
pass
else:
if self.required_meta_file and not os.path.exists(os.path.join(root, self.required_meta_file)):
# print('skipping {} [{}]'.format(root, self.required_meta_file))
pass
elif self.required_exists_file:
# this can't exist inside a bare repo
pass
elif self.verbose:
print('.git\t{0}'.format(root))
else:
print(root)
del dirs[:]
if self.warn_no_flagfiles and not is_repo_thisdir:
if not set(files).intersection(REPO_RECURSE_FLAGFILES):
print('# WARNING: not a repo, no flag-files: {}'.format(root), file=sys.stderr)
# Ideally, we'd detect SKIP_CHILDREN_FILENAME and in sub-dirs under that, suppress this warning, by mutating a context only passed to child dirs.
# But the API doesn't offer us that context. So instead I added --obey-skips: a crude bodge.
if self.obey_skips and SKIP_ALL_FILENAME in files:
del dirs[:]
if self.obey_skips and SKIP_CHILDREN_FILENAME in files:
skip = [entry for entry in [line.rstrip() for line in open(os.path.join(root, SKIP_CHILDREN_FILENAME))] if entry and not entry.startswith('#')]
for entry in skip:
if entry in dirs:
dirs.remove(entry)
if self.symlinks_file is not None and not is_repo_thisdir:
# It's already in dirs, so there's already been a stat ... I need to decide when to switch to that newer walk API which caches this
for e in dirs:
fn = os.path.join(root, e)
st = os.lstat(fn)
if stat.S_ISLNK(st.st_mode):
print('{0} -> {1}'.format(fn, os.readlink(fn)), file=self.symlinks_file)
def _main(args, argv0):
parser = argparse.ArgumentParser()
repo_stops = ['.git', '.hg', 'CVS', '.bzr', '.svn', '.sl']
bundle_exts = ['.fossil']
dir_exts = ['.git']
known_types = frozenset(set(repo_stops) | set(bundle_exts) | set(dir_exts))
parser.add_argument('-x', '--exclude',
action='append', dest='exclude_paths', default=[],
help='skip anything under one of these prefices')
parser.add_argument('-o', '--only-repo-type',
action='append', dest='only_repo_types', metavar='TYPE+',
choices=known_types, default=[],
help='only print for these repo dirs (available: %(choices)s) (default: %(default)s)')
parser.add_argument('-r', '--repo-dir',
action='append', dest='repo_dirs', default=repo_stops,
help='Add to list of repo dirs (default %(default)s)')
parser.add_argument('-f', '--bundle-ext',
action='append', dest='bundle_exts', default=bundle_exts,
help='Add to list of repo file extensions (default %(default)s)')
parser.add_argument('-d', '--dir-ext',
action='append', dest='dir_exts', default=dir_exts,
help='Add to list of repo dirname extensions (default %(default)s)')
parser.add_argument('-v', '--verbose',
action='store_true', dest='verbose', default=False,
help='Be more verbose')
parser.add_argument('--symlinks-fd',
type=int, default=None, metavar='FD',
help='FD to write encountered symlinks to')
parser.add_argument('--warn-no-flagfiles',
action='store_true', default=False,
help='Outside repos, warn if no control flag-files are seen (probably want --obey-skips too)')
parser.add_argument('--obey-skips',
action='store_true', default=False,
help='Obey .skip-children files')
parser.add_argument('-F', '--meta-file',
type=str, default=None, metavar='FN',
help='filename relative to repo meta-dir to require to exist, to print')
parser.add_argument('-e', '--exists-file',
type=str, default=None, metavar='FN',
help='filename relative to repo working tree to require to exist, to print, including file')
parser.add_argument('top_dirs',
type=str, nargs='*', metavar='DIR',
help='top level directories')
options = parser.parse_args(args=args)
if not options.top_dirs:
options.top_dirs = ['.']
unknown = set(options.only_repo_types) - known_types
if unknown:
raise Exit('unknown repo types to limit to: {}'.format(' '.join(sorted(unknown))))
walker = Walker(options)
for top in options.top_dirs:
walker.find_under(top)
return 0
if __name__ == '__main__':
argv0 = sys.argv[0].rsplit('/')[-1]
try:
rv = _main(sys.argv[1:], argv0=argv0)
except Exit as e:
for arg in e.args:
print('{}: {}'.format(argv0, arg), file=sys.stderr)
sys.exit(1)
sys.exit(rv)
# vim: set ft=python sw=4 expandtab :
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment