Created
January 25, 2023 12:36
-
-
Save Guiorgy/24aaf83aa1e665805adf17640acdb911 to your computer and use it in GitHub Desktop.
A Python generator that takes an alphabet string and word length, and generates words (character combinations from the alphabet) in the desired range
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
import math | |
from typing import Optional, Generator | |
def dictionary_generator(alphabet: str, word_length: int, start: Optional[int] = None, stop: Optional[int] = None) -> Generator[str, None, None]: | |
def new_state(start=0, stop=None): | |
return itertools.islice(alphabet, start, stop) | |
if not alphabet: | |
raise ValueError('alphabet can\'t be empty') | |
if word_length < 1: | |
raise ValueError('word_length has to be positive') | |
alphabet_size = len(alphabet) | |
dictionary_size = pow(alphabet_size, word_length) | |
if start is not None and not (0 <= start < dictionary_size): | |
raise ValueError(f'start needs to be between 0 and dictionary_size({dictionary_size})') | |
if stop is not None and not ((start if start else 0) <= stop < dictionary_size): | |
raise ValueError(f'stop needs to be between start and dictionary_size({dictionary_size})') | |
if stop is None: | |
stop = dictionary_size - 1 | |
states = [None] * word_length | |
if start is None or start == 0: | |
word_count = 0 | |
states = [new_state() for _ in range(word_length)] | |
else: | |
word_count = start | |
i = word_length - 1 | |
while i >= 0 and start: | |
offset = start % alphabet_size | |
states[i] = new_state(start=offset) | |
start -= offset | |
start = math.floor(start / alphabet_size) | |
i -= 1 | |
while i >= 0: | |
states[i] = new_state() | |
i -= 1 | |
generated_word = '' | |
for i in range(word_length - 1): | |
generated_word += next(states[i]) | |
last_index = word_length - 1 | |
current_index = last_index | |
while current_index >= 0: | |
for char in states[last_index]: | |
yield generated_word + char | |
if word_count == stop: | |
current_index = -1 | |
break | |
word_count += 1 | |
if current_index == -1: | |
break | |
states[last_index] = new_state() | |
word_changed_part = '' | |
current_index = last_index - 1 | |
while current_index >= 0: | |
try: | |
word_changed_part = next(states[current_index]) + word_changed_part | |
generated_word = generated_word[:-(last_index - current_index)] + word_changed_part | |
break | |
except StopIteration: | |
states[current_index] = new_state() | |
word_changed_part = next(states[current_index]) + word_changed_part | |
current_index -= 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment