Skip to content

Instantly share code, notes, and snippets.

@neftlon
Last active November 25, 2022 08:25
Show Gist options
  • Save neftlon/21339d2813836c4f9891b7b9e29d28af to your computer and use it in GitHub Desktop.
Save neftlon/21339d2813836c4f9891b7b9e29d28af to your computer and use it in GitHub Desktop.
pick k random files from a directory that lay within a given size range
#!/usr/bin/env python3
"""pick k random files from a directory that lay within a given size range"""
import os
import random
import sys
if len(sys.argv) != 2:
print(f"usage: {sys.argv[0]} dirname")
sys.exit(-1)
K = 100
DIRNAME = sys.argv[1]
# TODO: read these in from command line
MINSIZE = 1e3 # 1kb
MAXSIZE = 1e7 # 10mb
names = os.listdir(DIRNAME)
paths = [os.path.join(DIRNAME, name) for name in names]
stats = [os.lstat(fn) for fn in paths]
ok_sized = [path for path, stat in zip(paths, stats) if MINSIZE < stat.st_size < MAXSIZE]
sample = random.choices(ok_sized, k=K)
# output paths line-by-line
for item in sample:
print(item)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment