Skip to content

Instantly share code, notes, and snippets.

@tsangwpx
Last active April 10, 2021 13:55
Show Gist options
  • Save tsangwpx/5940e0f95f135628335795a44078f688 to your computer and use it in GitHub Desktop.
Save tsangwpx/5940e0f95f135628335795a44078f688 to your computer and use it in GitHub Desktop.
remove_unused_virtualenvs.py
"""
Script to find dangling and probe potential unused virtualenvs
By default, dangling virtualenvs is automatically removed
Potentially unused virtualenvs are shown
"""
import logging
import os
import shutil
import stat
from argparse import ArgumentParser
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
from pathlib import Path
from typing import Iterator, Tuple, Optional
def file_usage(path: Path) -> int:
total = 0
for parent, dname, fnames in os.walk(path):
total += sum(os.path.getsize(os.path.join(parent, name)) for name in fnames)
return total
@dataclass
class VirtualEnvironment:
name: str
path: Path
project_path: Optional[Path]
size: Optional[int] = None
def __eq__(self, other):
if isinstance(other, VirtualEnvironment):
return self.path == other.path
return NotImplemented
def __hash__(self):
return hash(self.path)
@classmethod
def from_path(cls, name: str, path: Path):
path = path.resolve(strict=True)
pyvenv_path = path.joinpath('pyvenv.cfg')
if not pyvenv_path.is_file():
raise FileNotFoundError(pyvenv_path)
project_path = None
try:
dot_project = path.joinpath('.project').read_text('utf-8')
if dot_project:
project_path = Path(dot_project)
except FileNotFoundError:
pass
return cls(
name=name,
path=path,
project_path=project_path,
)
def walk_stats(root: Path) -> Iterator[Tuple[Path, os.stat_result]]:
def _walk_inner(path: str):
path_stat = os.stat(path)
yield Path(path), path_stat
if not stat.S_ISDIR(path_stat.st_mode):
return
for entry in os.scandir(path): # type: os.DirEntry
if entry.is_dir() and not entry.is_symlink():
yield from _walk_inner(entry.path)
else:
yield Path(entry.path), entry.stat(follow_symlinks=False)
return _walk_inner(os.fspath(root))
def find_venvs(venv_home: Path):
virtualenvs = []
for venv_path in sorted(venv_home.iterdir()):
virtualenvs.append(VirtualEnvironment.from_path(venv_path.name, venv_path))
return virtualenvs
def main():
logger = logging.getLogger()
p = ArgumentParser()
p.add_argument('venv_home', type=Path, nargs='?')
ns = p.parse_args()
if ns.venv_home is None:
venv_home = Path('~/.virtualenvs')
else:
venv_home = Path(ns.venv_home)
venv_home = venv_home.expanduser()
logger.debug('venv_home=%r', venv_home)
if not venv_home.exists() or not venv_home.is_dir():
logger.error('%s does not exist or it is not a directory', venv_home)
return
executor = ThreadPoolExecutor()
virtualenvs = sorted(find_venvs(venv_home), key=lambda s: s.path)
def inject_disk_usage(s: VirtualEnvironment):
s.size = file_usage(s.path)
for _ in executor.map(inject_disk_usage, virtualenvs):
pass
related_virtualenvs = {s for s in virtualenvs if s.project_path is not None}
dangling_virtualenvs = {s for s in related_virtualenvs if not s.project_path.exists()}
# because of stable sort, we move dangling venvs to the last
virtualenvs.sort(key=lambda s: int(s in dangling_virtualenvs))
fmt = "{size:>8s} {related:>8s} {name:48s} -> {project_path:s}"
print(fmt.format(
size="Size",
related="Related",
name="Name",
project_path="Project Path",
))
field_fmts = {
'size': '{:.0f}m',
'related': '{}',
'name': '{}',
'project_path': '{}',
}
for venv in virtualenvs:
kwargs = {
'size': venv.size / 1024 ** 2,
'related': (
'dangling' if venv in dangling_virtualenvs else
'yes' if venv in related_virtualenvs else
'no'
),
'name': venv.name,
'project_path': str(venv.project_path) if venv.project_path else '',
}
print(fmt.format(**{k: v.format(kwargs[k]) for k, v in field_fmts.items()}))
print(f"Total space usage: {sum(s.size for s in virtualenvs) / 1024 ** 2:.0f}m")
print(f"Dangling usage: {sum(s.size for s in dangling_virtualenvs) / 1024 ** 2:.0f}m")
ans = 'no'
if dangling_virtualenvs:
ans = input('Remove dangling? (NO/yes) ')
if ans.strip().lower() != 'yes':
print("Exit")
return
for venv in dangling_virtualenvs:
print(f"Removing {str(venv.path):s}")
shutil.rmtree(venv.path, ignore_errors=True)
def _on_rmtree_error(func, path, exc_info):
logging.getLogger().exception('Error when rmtree()')
if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment