Last active
November 25, 2022 08:25
-
-
Save neftlon/21339d2813836c4f9891b7b9e29d28af to your computer and use it in GitHub Desktop.
pick k random files from a directory that lay within a given size range
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""pick k random files from a directory that lay within a given size range""" | |
import os | |
import random | |
import sys | |
if len(sys.argv) != 2: | |
print(f"usage: {sys.argv[0]} dirname") | |
sys.exit(-1) | |
K = 100 | |
DIRNAME = sys.argv[1] | |
# TODO: read these in from command line | |
MINSIZE = 1e3 # 1kb | |
MAXSIZE = 1e7 # 10mb | |
names = os.listdir(DIRNAME) | |
paths = [os.path.join(DIRNAME, name) for name in names] | |
stats = [os.lstat(fn) for fn in paths] | |
ok_sized = [path for path, stat in zip(paths, stats) if MINSIZE < stat.st_size < MAXSIZE] | |
sample = random.choices(ok_sized, k=K) | |
# output paths line-by-line | |
for item in sample: | |
print(item) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment