Skip to content

Instantly share code, notes, and snippets.

@j9ac9k
Created April 10, 2021 21:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save j9ac9k/7cddc9025bb7040f888bf1e9c9fdde1c to your computer and use it in GitHub Desktop.
Save j9ac9k/7cddc9025bb7040f888bf1e9c9fdde1c to your computer and use it in GitHub Desktop.
Generate word-lists from python modules, useful for spell checkers such as cSpell
import argparse
import importlib
import inspect
from collections import deque
from typing import Any, List, Set, Tuple
def get_nested_members(module, predicate=None) -> List[Tuple[str, Any]]:
return [
module_info
for module_info in inspect.getmembers(module, predicate=predicate)
if not module_info[0].startswith("_")
]
def write_keywords(words: Set[str], module):
words = sorted(words)
with open(f"{module.__name__}.txt", "wt") as words_file:
words_file.write("\n".join(words))
words_file.write("\n")
def make_cspell_words(words: List[str]) -> Set[str]:
processed = set()
while words:
word = words.pop()
if "_" in word:
components = word.split("_")
words.extend(components)
continue
if len(word) < 4:
continue
processed.add(word.lower())
return processed
def main(args):
top_level_module = importlib.import_module(args.module)
words = set([args.module])
frontier = deque([top_level_module])
already_seen = set()
while frontier:
module = frontier.popleft()
if module in already_seen or (not module.__name__.startswith(args.module)):
continue
already_seen.add(module)
# get modules and add to the frontier
modules = get_nested_members(module, predicate=inspect.ismodule)
frontier.extend([module[1] for module in modules])
# add as many key-words as we can identify
extracted_words = [member[0] for member in get_nested_members(module)]
words = words.union(make_cspell_words(extracted_words))
# get as many kwargs as we can get to
func_or_methods = get_nested_members(module, predicate=(inspect.isfunction or inspect.ismethod))
kwargs = []
for _, func_or_method in func_or_methods:
try:
kwargs.extend(inspect.signature(func_or_method).parameters.keys())
except ValueError:
# no signature can br provied
pass
except TypeError:
# object is not supported
print(f"TypeError for {func_or_method}")
words = words.union(make_cspell_words(kwargs))
write_keywords(words, top_level_module)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-m",
"--module",
required=True,
help="Required Module to Extract keywords from")
args = parser.parse_args()
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment