Skip to content

Instantly share code, notes, and snippets.

@themiurgo
Created May 29, 2014 13:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save themiurgo/868268025a3e7e3b55a0 to your computer and use it in GitHub Desktop.
Save themiurgo/868268025a3e7e3b55a0 to your computer and use it in GitHub Desktop.
Word Generator
#!/usr/bin/env python
#-*- coding: utf-8 -*-
"""This script was used to generate random test files for the course
of Computer Networks 2014 at University of Birmingham, Computer Science.
Author: Antonio Lima
License: WTFPL
"""
from __future__ import print_function
import collections
import itertools
import random
import string
import sys
def default_lengths(min_length=3, max_length=10):
while True:
yield random.randint(min_length, max_length)
def random_word_length(alphabet, length):
return ''.join((random.choice(alphabet) for x in xrange(length)))
def random_words(alphabet, lengths=default_lengths()):
for length in lengths:
yield random_word_length(alphabet, length)
def random_words(alphabet=string.letters, lengths=default_lengths()):
random.seed("networks2014")
rwl = random_word_length
words = (rwl(alphabet, length) for length in lengths)
for word in words:
yield word
def random_words_repetition(repetition_probability, words):
seen = collections.deque(maxlen=200000)
for word in words:
seen.append(word)
yield word
if random.random() < repetition_probability:
yield seen.popleft()
def size_capper(total_size, words):
size = 0
for word in words:
yield word
size += len(word.encode('utf-8')) + 1 # Considers EOL
if size > total_size:
break
def main():
usage = """Usage:
wordgenerator <bytes> <repetition_probability>
The script outputs to STDOUT. Repetition probability must be between
0 and 1.
"""
try:
size = int(sys.argv[1])
prob = float(sys.argv[2])
except (IndexError, ValueError):
print(usage, file=sys.stderr)
return 1
wordlist = random_words()
wordlist_with_repetition = random_words_repetition(prob, wordlist)
capped_wordlist = size_capper(size, wordlist_with_repetition)
for word in capped_wordlist:
print(word, file=sys.stdout)
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment