-
-
Save mart-r/d853073f1c724aa5345769faea4a869e to your computer and use it in GitHub Desktop.
Update the spacy model name in older MedCAT models that specified it as `spacy_model` - use the default (`en_core_web_md`) instead. Can overwrite existing CDB (1 CLI argument) or write to a new file (2 CLI arguments).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Optional | |
from medcat.cdb import CDB | |
from sys import argv | |
def main(cdb_file: str, new_file_path: Optional[str] = None, new_spacy_model: str = 'en_core_web_md'): | |
if new_file_path is None: | |
new_file_path = cdb_file | |
print("Overwriting existing CDB") | |
cdb = CDB.load(cdb_file) | |
prev_spacy_model = cdb.config.general.spacy_model | |
print("Updating spacy model from", prev_spacy_model, "to", new_spacy_model) | |
cdb.config.general.spacy_model = new_spacy_model | |
cdb.save(new_file_path) | |
print("Saved CDB to", new_file_path) | |
if __name__ == "__main__": | |
# 1 argument -> overwrite | |
# 2 arguments -> save to second file | |
main(*argv[1:]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment