Skip to content

Instantly share code, notes, and snippets.

@muzimuzhi
Last active January 9, 2023 08:31
Show Gist options
  • Save muzimuzhi/70e31826e4efe351ddd03fe1edff3410 to your computer and use it in GitHub Desktop.
Save muzimuzhi/70e31826e4efe351ddd03fe1edff3410 to your computer and use it in GitHub Desktop.
remove duplicates in texstudio completion file `tcolorbox.cwl`
#!/usr/bin/env python3
import itertools
from typing import Dict, List, Tuple
import re
CWL_PATH = "your path"
CWL_FILE = CWL_PATH + "texstudio/completion/tcolorbox.cwl"
def get_duplicate_relation(cwl_file):
match_result: List[Tuple[str, str]] = re.findall(r"""#keyvals:(\\[a-zA-Z]*)\n(.*?)#endkeyvals""", cwl_file, flags=re.DOTALL)
match_dict: Dict[str, str] = {} # Dict[cmd: str, keyvals: str]
for cmd, keyvals in match_result:
match_dict[cmd] = keyvals
dup_cmds_count = 0
dup_commands: List[str] = [] # list of all duplicate cmds
dup_dict: Dict[str, List[str]] = {} # Dict(cmdA: <list of cmds duplicate with cmdA>)
for cmd in match_dict:
dup_dict[cmd] = []
pair_iter = itertools.combinations(match_dict, 2)
for cmd1, cmd2 in pair_iter:
if cmd1 not in dup_commands and \
cmd2 not in dup_commands and \
match_dict[cmd1] == match_dict[cmd2]:
# keyvalss of cmd2 is duplicate to cmd1
dup_commands.append(cmd2)
dup_dict[cmd1].append(cmd2)
dup_cmds_count += 1
print(f"total number of keyvals group: {len(match_result)}")
print(f"\tof which {dup_cmds_count} groups are removable\n")
return match_dict, dup_commands, dup_dict
def remove_duplicate(cwl_file, match_dict, dup_dict):
new_cwl = cwl_file
prefix_count = kv_count = 0
for cmd_base, cmds_dup in dup_dict.items():
if cmds_dup:
# replace
# #keyvals:cmd_base
# with
# #keyvals:cmd_base,<comma separated cmds_dup>
cmds = cmd_base + ',' + ','.join(cmds_dup)
pattern_prefix = '#keyvals:' + re.escape(cmd_base) + '\n'
repl_prefix = '#keyvals:' + re.escape(cmds) + '\n'
assert re.search(pattern_prefix, cwl_file) is not None
print(f"Replace group {prefix_count}\n\t{pattern_prefix}with\n\t{repl_prefix}")
new_cwl = re.sub(re.escape(pattern_prefix), repl_prefix, new_cwl)
prefix_count += 1
# replace
# #keyvals:<cmd_base> \xx \yy \zz #endkeyvals
# #keyvals:<cmd_dup> \xx \yy \zz #endkeyvals
# with
# #keyvals:<cmd_base>,<cmd_dup> \xx \yy \zz #endkeyvals
for cmd_dup in cmds_dup:
pattern_kv = r"""\n\n# -*\n# [^\n]*\n# -*\n#keyvals:""" + re.escape(cmd_dup) + r"""\n.*?#endkeyvals"""
match = re.findall(pattern_kv, new_cwl, flags=re.DOTALL)
assert match_dict[cmd_dup] in match[0] and len(match) == 1
print(f"\tRemove duplicate keyvals list {kv_count} cmd {cmd_dup}")
new_cwl = re.sub(pattern_kv, '', new_cwl, flags=re.DOTALL)
kv_count += 1
return new_cwl
with open(CWL_FILE + "-old") as file:
original_cwl = file.read()
match_dict, dup_commands, dup_dict = get_duplicate_relation(original_cwl)
new_cwl = remove_duplicate(original_cwl, match_dict, dup_dict)
with open(CWL_FILE + '-new', 'w') as file:
file.write(new_cwl)
@muzimuzhi
Copy link
Author

  • input is tcolorbox.cwl-old
  • output is tcolorbox.cwl-new

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment