Skip to content

Instantly share code, notes, and snippets.

@DavideGalilei
Created November 18, 2023 20:15
Show Gist options
  • Save DavideGalilei/f81ea1cebd0fd45a538758279577b007 to your computer and use it in GitHub Desktop.
Save DavideGalilei/f81ea1cebd0fd45a538758279577b007 to your computer and use it in GitHub Desktop.
Dictiofuzzy - recursive words meaning stats
import re
import json
import random
from rich.console import Console
from rich.markup import escape
with open('dictionary_compact.json', 'r') as json_file:
dictionary = json.load(json_file)
WORD_RE = re.compile(r"[a-z']+", re.UNICODE | re.IGNORECASE)
known_words: set[str] = set()
unknown_words: set[str] = set()
console = Console()
def debug(*args, **kwargs):
# console.print(*args, **kwargs)
pass
def analyze(sentence: str):
# iterative version
words = WORD_RE.findall(sentence.lower())
i = 0
with console.status("Analyzing...") as status:
while words:
i += 1
if i % 100 == 0:
status.update(f"[bold blue]Total: {i}[reset] [green]|[reset] [bold]Analyzing...[reset] {len(words)} words left + {len(unknown_words)} unknown words")
word = words.pop().lower()
if word in known_words:
continue
elif word in unknown_words:
continue
if word not in dictionary:
debug(f"Storing '{word}' in known_words, total: {len(known_words)}")
unknown_words.add(word)
# raise ValueError(f"Word '{word}' not found in dictionary")
continue
known_words.add(word)
debug("\n[bold red]" + word)
debug(escape(dictionary[word]))
for word in reversed(WORD_RE.findall(dictionary[word].lower())):
if word not in known_words:
words.append(word)
sentence = input("Enter a sentence: ")
analyze(sentence)
known_words = sorted(known_words)
unknown_words = sorted(unknown_words)
console.print(f":sparkles: To totally understand [bold blue]'{sentence}'[reset], you need to know: [green bold]{len(known_words)}[reset] words, plus [red bold]{len(unknown_words)}[reset] unknown words.")
console.print(f":bulb: Random known words: {random.sample(known_words, k=5)}", highlight=True)
console.print(f":fire: Random unknown words: {random.sample(unknown_words, k=5)}", highlight=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment