Skip to content

Instantly share code, notes, and snippets.

@mawillcockson
Created July 13, 2021 07:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mawillcockson/e5641f6b3b0ecd0099d1c7942455e609 to your computer and use it in GitHub Desktop.
Save mawillcockson/e5641f6b3b0ecd0099d1c7942455e609 to your computer and use it in GitHub Desktop.
Clean up Typey Types progress output
import json
import string
from collections import Counter
from datetime import datetime
from pathlib import Path
from typing import Dict, List
PROGRESS_FILE = (
Path("~/.config/plover/typey-type_progress.json").expanduser().resolve(strict=True)
)
entries: Dict[str, int] = json.loads(PROGRESS_FILE.read_text())
# break into counts
words: List[str] = list(Counter(entries).elements())
# strip leading and trailing spaces
words = [word.strip() for word in words]
# strip punctuation and internal whitespace
CHARACTER_EXCEPTIONS = {"-"}
characters_to_remove = "".join(
set(string.punctuation + string.whitespace) - CHARACTER_EXCEPTIONS
)
translation_table = str.maketrans("", "", characters_to_remove)
words = [word.translate(translation_table) for word in words]
# normalize capitalization
words = [word.casefold() for word in words]
CAPITALIZATION_EXCEPTIONS = {
"paul": "Paul",
"french": "French",
}
words = [CAPITALIZATION_EXCEPTIONS.get(word, word) for word in words]
# make a backup
old_progress_file = PROGRESS_FILE.with_name(
PROGRESS_FILE.name + f"{datetime.now().isoformat(timespec='seconds')}.bak"
)
assert (
not old_progress_file.exists()
), f"'{old_progress_file}' already exists; please rename it"
PROGRESS_FILE.rename(old_progress_file)
# write out cleaned progress
counts = Counter(words)
cleaned_progress = {key: counts[key] for key in counts}
PROGRESS_FILE.write_text(json.dumps(cleaned_progress))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment