dictionary_compact.json: https://github.com/matthewreagan/WebstersEnglishDictionary/blob/master/dictionary_compact.json
$ pip install rich
dictionary_compact.json: https://github.com/matthewreagan/WebstersEnglishDictionary/blob/master/dictionary_compact.json
$ pip install rich
import re | |
import json | |
import random | |
from rich.console import Console | |
from rich.markup import escape | |
with open('dictionary_compact.json', 'r') as json_file: | |
dictionary = json.load(json_file) | |
WORD_RE = re.compile(r"[a-z']+", re.UNICODE | re.IGNORECASE) | |
known_words: set[str] = set() | |
unknown_words: set[str] = set() | |
console = Console() | |
def debug(*args, **kwargs): | |
# console.print(*args, **kwargs) | |
pass | |
def analyze(sentence: str): | |
# iterative version | |
words = WORD_RE.findall(sentence.lower()) | |
i = 0 | |
with console.status("Analyzing...") as status: | |
while words: | |
i += 1 | |
if i % 100 == 0: | |
status.update(f"[bold blue]Total: {i}[reset] [green]|[reset] [bold]Analyzing...[reset] {len(words)} words left + {len(unknown_words)} unknown words") | |
word = words.pop().lower() | |
if word in known_words: | |
continue | |
elif word in unknown_words: | |
continue | |
if word not in dictionary: | |
debug(f"Storing '{word}' in known_words, total: {len(known_words)}") | |
unknown_words.add(word) | |
# raise ValueError(f"Word '{word}' not found in dictionary") | |
continue | |
known_words.add(word) | |
debug("\n[bold red]" + word) | |
debug(escape(dictionary[word])) | |
for word in reversed(WORD_RE.findall(dictionary[word].lower())): | |
if word not in known_words: | |
words.append(word) | |
sentence = input("Enter a sentence: ") | |
analyze(sentence) | |
known_words = sorted(known_words) | |
unknown_words = sorted(unknown_words) | |
console.print(f":sparkles: To totally understand [bold blue]'{sentence}'[reset], you need to know: [green bold]{len(known_words)}[reset] words, plus [red bold]{len(unknown_words)}[reset] unknown words.") | |
console.print(f":bulb: Random known words: {random.sample(known_words, k=5)}", highlight=True) | |
console.print(f":fire: Random unknown words: {random.sample(unknown_words, k=5)}", highlight=True) |