Skip to content

Instantly share code, notes, and snippets.

@joaovarelas
Last active May 23, 2023 19:41
Show Gist options
  • Save joaovarelas/42fc3f134cce986cc40825ef79e64425 to your computer and use it in GitHub Desktop.
Save joaovarelas/42fc3f134cce986cc40825ef79e64425 to your computer and use it in GitHub Desktop.
from faker import Faker
import random
import string
import concurrent.futures
extensions = {
'pdf': b'\x25\x50\x44\x46\x2D\x31\x2E',
'jpg': b'\xFF\xD8\xFF\xE0\x00\x10\x4A\x46\x49\x46\x00\x01',
'png': b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08',
'zip': b'\x50\x4B\x03\x04',
#'tar': b'\x75\x73\x74\x61\x72\x00\x30\x30',
'tar': b'\x1F\x8B',
'doc': b'\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1',
'docx': b'\x50\x4B\x03\x04\x14\x00\x06\x00',
}
fake = Faker()
def generate_fake_filename(ext):
filename = fake.file_name(extension=ext) # Adjust category as needed
return f'{filename}'
def generate_random_content(extension):
min_size = 5 * 1024 # 5 KB
max_size = int(1.2 * 1024 * 1024) # 1.2 MB
content_size = random.randint(min_size, max_size)
signature = extensions[extension]
remaining_size = content_size - len(signature)
#content = signature + bytes(random.randint(0, 255) for _ in range(remaining_size))
content = signature + bytes(random.getrandbits(8) for _ in range(remaining_size))
return content
def generate():
print("Starting generate() worker...")
# Generate and write files with random content
extension = random.choice(list(extensions.keys()))
filename = generate_fake_filename(extension)
content = generate_random_content(extension)
with open(filename, 'wb') as file:
file.write(content)
# Generate 100 files with random bytes (magic bytes should match extension)
with concurrent.futures.ThreadPoolExecutor(max_workers=64) as executor:
n_files = 100
futures = [executor.submit(generate, ) for _ in range(n_files)]
# Wait for all tasks to complete
for future in concurrent.futures.as_completed(futures):
result = future.result()
#print(f"Result: {result}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment