Skip to content

Instantly share code, notes, and snippets.

@ckhung
Created July 27, 2019 06:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ckhung/c6117be6207283e6bc44b9b6b8a84376 to your computer and use it in GitHub Desktop.
Save ckhung/c6117be6207283e6bc44b9b6b8a84376 to your computer and use it in GitHub Desktop.
randomly pick n samples from each word/directory
#!/usr/bin/python3
# randomly pick n samples from each word/directory
# of speech_commands_v0.02/ data set:
# https://download.tensorflow.org/data/speech_commands_v0.02.tgz
# usage: subsample.py -f -n 300 * > ~/list.txt
# tar czf ~/dsc.tgz $(cat ~/list.txt)
import argparse, os, random
from warnings import warn
parser = argparse.ArgumentParser(
description='randomly pick n samples from each directory',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-n', '--npd', type=int, default=300,
help='pick this many files from each directory')
parser.add_argument('-s', '--randomseed', type=str, default='',
help='random seed')
parser.add_argument('-f', '--keepfiles', action='store_true',
help='also include files on the command line')
parser.add_argument('dirs', nargs='*', help='dir1 dir2 ...')
args = parser.parse_args()
if args.randomseed: random.seed(args.randomseed)
for path in args.dirs:
if not os.path.isdir(path):
if args.keepfiles:
print(path)
continue
allfiles = []
for f in os.listdir(path):
fp = path+'/'+f
if os.path.isfile(fp):
allfiles.append(fp)
if args.npd:
chosen = random.sample(allfiles, args.npd) if len(allfiles)>args.npd else allfiles
for fp in chosen:
print(fp)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment