Skip to content

Instantly share code, notes, and snippets.

@hyzyla
Created December 1, 2022 18:27
Show Gist options
  • Save hyzyla/6918e5d0e02f29ca5c183939e7091c5a to your computer and use it in GitHub Desktop.
Save hyzyla/6918e5d0e02f29ca5c183939e7091c5a to your computer and use it in GitHub Desktop.
Script that allows to find unwrapped strings in Ukrainian langauge for translation
#!/usr/bin/env python3
# flake8: noqa
"""
Script that allows to find unwrapped strings in Ukrainian langauge for translation
"""
import ast
import re
import textwrap
from pathlib import Path
from typing import List, Any, Dict, Iterator
UKRAINIAN_TEXT_RE = re.compile(r'.*[а-яА-ЯєЄІіЇїҐґ]+.*')
PY_MODULES = [
'src',
]
IGNORE_PATTERNS = [
# Ignore tests
'**/test_*',
'**/tests/*'
]
class Analyzer(ast.NodeVisitor):
def __init__(self) -> None:
# Mapping of lineno: ast.AST
self.unwrapped_map: Dict[int, ast.AST] = {}
@property
def unwrapped(self) -> List[ast.AST]:
""" Sort unwrapped nodes by lineno and return a list of sorted nodes """
mapping = self.unwrapped_map
return [mapping[lineno] for lineno in sorted(mapping)]
def visit_ua_text(self, node: ast.AST) -> None:
"""" Check and collect unwrapped string nodes """
# convert from node to source code
value: str = ast.unparse(node)
# check if UA text is unwrapped
if is_ua_text(value) and not is_wrapped(node):
self.unwrapped_map.setdefault(node.lineno, node)
self.generic_visit(node)
def visit_JoinedStr(self, node: ast.JoinedStr) -> None:
self.visit_ua_text(node)
def visit_Constant(self, node: ast.Constant) -> None:
self.visit_ua_text(node)
def is_ua_text(value: Any) -> bool:
""" Check if constant value is string with ukrainian characters """
return isinstance(value, str) and bool(UKRAINIAN_TEXT_RE.match(value))
def is_wrapped(node: ast.AST) -> bool:
""" Check if current node is wrapped by translate function _ """
parent = getattr(node, 'parent', None)
if parent and isinstance(parent, ast.Call):
func = parent.func
if isinstance(func, ast.Name):
if func.id == '_':
return True
return False
def is_ignored_filepath(filepath: Path) -> bool:
""" Check if given filepath is ignored or not """
for ignore_pattern in IGNORE_PATTERNS:
if filepath.match(ignore_pattern):
return True
return False
def get_python_filenames() -> Iterator[Path]:
""" Get path to all .py in given modules """
for py_module in PY_MODULES:
for path in Path(py_module).rglob('*.py'):
yield path
def get_sources_filenames() -> Iterator[Path]:
"""
Get paths to all the .py files in project and filter out the
unwanted files (tests, migrations, etc)
"""
for filepath in get_python_filenames():
if not is_ignored_filepath(filepath):
yield filepath
def attach_parent_node(tree: ast.AST) -> ast.AST:
""" Attach pointer to parent node in children nodes """
for node in ast.walk(tree):
for child in ast.iter_child_nodes(node):
child.parent = node # type: ignore
return tree
def get_unwrapped_messages() -> None:
"""Get unwrapped messages"""
for filename in get_sources_filenames():
source: str = filename.read_text()
source_tree: ast.AST
source_tree = ast.parse(source)
source_tree = attach_parent_node(source_tree)
analyzer = Analyzer()
analyzer.visit(source_tree)
unwrapped_nodes = analyzer.unwrapped
for unwrapped in unwrapped_nodes:
unwrapped_value = ast.unparse(unwrapped)
unwrapped_value = textwrap.shorten(
text=unwrapped_value,
width=80,
placeholder="...",
)
print(f'{filename}:{unwrapped.lineno} {unwrapped_value}')
# create space between files with unwrapped nodes
if unwrapped_nodes:
print('')
if __name__ == "__main__":
print("⌛ Extracting unwrapped messages ...")
get_unwrapped_messages()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment