Last active
January 9, 2023 08:31
-
-
Save muzimuzhi/70e31826e4efe351ddd03fe1edff3410 to your computer and use it in GitHub Desktop.
remove duplicates in texstudio completion file `tcolorbox.cwl`
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import itertools | |
from typing import Dict, List, Tuple | |
import re | |
CWL_PATH = "your path" | |
CWL_FILE = CWL_PATH + "texstudio/completion/tcolorbox.cwl" | |
def get_duplicate_relation(cwl_file): | |
match_result: List[Tuple[str, str]] = re.findall(r"""#keyvals:(\\[a-zA-Z]*)\n(.*?)#endkeyvals""", cwl_file, flags=re.DOTALL) | |
match_dict: Dict[str, str] = {} # Dict[cmd: str, keyvals: str] | |
for cmd, keyvals in match_result: | |
match_dict[cmd] = keyvals | |
dup_cmds_count = 0 | |
dup_commands: List[str] = [] # list of all duplicate cmds | |
dup_dict: Dict[str, List[str]] = {} # Dict(cmdA: <list of cmds duplicate with cmdA>) | |
for cmd in match_dict: | |
dup_dict[cmd] = [] | |
pair_iter = itertools.combinations(match_dict, 2) | |
for cmd1, cmd2 in pair_iter: | |
if cmd1 not in dup_commands and \ | |
cmd2 not in dup_commands and \ | |
match_dict[cmd1] == match_dict[cmd2]: | |
# keyvalss of cmd2 is duplicate to cmd1 | |
dup_commands.append(cmd2) | |
dup_dict[cmd1].append(cmd2) | |
dup_cmds_count += 1 | |
print(f"total number of keyvals group: {len(match_result)}") | |
print(f"\tof which {dup_cmds_count} groups are removable\n") | |
return match_dict, dup_commands, dup_dict | |
def remove_duplicate(cwl_file, match_dict, dup_dict): | |
new_cwl = cwl_file | |
prefix_count = kv_count = 0 | |
for cmd_base, cmds_dup in dup_dict.items(): | |
if cmds_dup: | |
# replace | |
# #keyvals:cmd_base | |
# with | |
# #keyvals:cmd_base,<comma separated cmds_dup> | |
cmds = cmd_base + ',' + ','.join(cmds_dup) | |
pattern_prefix = '#keyvals:' + re.escape(cmd_base) + '\n' | |
repl_prefix = '#keyvals:' + re.escape(cmds) + '\n' | |
assert re.search(pattern_prefix, cwl_file) is not None | |
print(f"Replace group {prefix_count}\n\t{pattern_prefix}with\n\t{repl_prefix}") | |
new_cwl = re.sub(re.escape(pattern_prefix), repl_prefix, new_cwl) | |
prefix_count += 1 | |
# replace | |
# #keyvals:<cmd_base> \xx \yy \zz #endkeyvals | |
# #keyvals:<cmd_dup> \xx \yy \zz #endkeyvals | |
# with | |
# #keyvals:<cmd_base>,<cmd_dup> \xx \yy \zz #endkeyvals | |
for cmd_dup in cmds_dup: | |
pattern_kv = r"""\n\n# -*\n# [^\n]*\n# -*\n#keyvals:""" + re.escape(cmd_dup) + r"""\n.*?#endkeyvals""" | |
match = re.findall(pattern_kv, new_cwl, flags=re.DOTALL) | |
assert match_dict[cmd_dup] in match[0] and len(match) == 1 | |
print(f"\tRemove duplicate keyvals list {kv_count} cmd {cmd_dup}") | |
new_cwl = re.sub(pattern_kv, '', new_cwl, flags=re.DOTALL) | |
kv_count += 1 | |
return new_cwl | |
with open(CWL_FILE + "-old") as file: | |
original_cwl = file.read() | |
match_dict, dup_commands, dup_dict = get_duplicate_relation(original_cwl) | |
new_cwl = remove_duplicate(original_cwl, match_dict, dup_dict) | |
with open(CWL_FILE + '-new', 'w') as file: | |
file.write(new_cwl) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
tcolorbox.cwl-old
tcolorbox.cwl-new