Last active
May 30, 2022 22:51
dupe_files.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Python script that takes a directory, and displays files in that directory | |
tree with the same sizes, grouped by size. | |
Useful for hunting down duplicate files. | |
""" | |
from argparse import ArgumentParser, ArgumentTypeError | |
from os import walk | |
from os.path import getsize | |
from pathlib import Path | |
from typing import DefaultDict, Generator, List | |
def main() -> None: | |
parser = ArgumentParser("Find files with the same sizes in a directory tree.") | |
parser.add_argument( | |
"starting_point", | |
type=directory, | |
help="The directory to start looking in.", | |
) | |
args = parser.parse_args() | |
file_sizes = DefaultDict[float, List[Path]](list) | |
for path in iter_files(args.starting_point): | |
file_sizes[getsize(path)].append(path) | |
for size in sorted(file_sizes.keys()): | |
if len(file_sizes[size]) > 1: | |
print(sizeof_fmt(size)) | |
for path in file_sizes[size]: | |
print(f"\t{path}") | |
def iter_files(starting_point: str): | |
for dirpath, _, filenames in walk(starting_point): | |
for filename in filenames: | |
path = Path(dirpath) / Path(filename) | |
# This check is for dangling symlinks. | |
if path.is_file(): | |
yield path | |
def directory(path: str) -> Path: | |
dir_path = Path(path) | |
if not dir_path.is_dir(): | |
raise ArgumentTypeError("not a directory") | |
return dir_path | |
def sizeof_fmt(num: float) -> str: | |
# http://stackoverflow.com/a/1094933/240515 | |
for x in ["bytes", "KB", "MB", "GB"]: | |
if num < 1024.0: | |
return f"{num:3.1f}{x}" | |
num /= 1024.0 | |
return f"{num:3.1f}TB" | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment