Skip to content

Instantly share code, notes, and snippets.

@Suor
Created Aug 19, 2019
Embed
What would you like to do?
Generate files
#!/usr/bin/env python3
import argparse
import os
import sys
import random
from funcy import re_find
from tqdm import tqdm
CHUNK_SIZE = 1024 * 512
TEXT = (
"This is an autogenerated text, which would be repated many times\n"
"This is a random number %f to make files different\n\n"
)
def main():
parser = argparse.ArgumentParser(description='Generate files.')
parser.add_argument('dir', metavar="DIR", help='a directory path to fill')
parser.add_argument('n', metavar="N", type=int, help='number of files')
parser.add_argument('-s', '--size', type=parse_size, default='1m',
help='average file size')
parser.add_argument('-v', '--vary', type=int, default=15,
help='file size variation percent')
parser.add_argument('-e', '--ext', default='txt', help='file extension')
args = parser.parse_args()
os.makedirs(args.dir, exist_ok=True)
for i in tqdm(range(args.n)):
sample_bytes = (TEXT % random.random()).encode()
chunk = sample_bytes * (CHUNK_SIZE // len(sample_bytes))
with open(f"{args.dir}/{i}.{args.ext}", "wb") as fd:
size = random.randint(int(args.size * (1 - args.vary / 100)),
int(args.size * (1 + args.vary / 100)))
for _ in range(size // len(chunk)):
fd.write(chunk)
if size % len(chunk):
fd.write(chunk[:size % len(chunk)])
UNITS = {"B": 1, "KB": 1024, "MB": 1024**2, "GB": 1024**3, "TB": 1024**4}
UNITS["K"] = UNITS["KB"]
UNITS["M"] = UNITS["MB"]
UNITS["G"] = UNITS["GB"]
def parse_size(size):
number, unit = re_find(r'([\d.]+) ?(\w+)?', size)
unit = unit or 'B'
return int(float(number) * UNITS[unit.upper()])
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment