Skip to content

Instantly share code, notes, and snippets.

@wfng92
Created May 4, 2021 05:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wfng92/86003650fdc4648d06833b411060f474 to your computer and use it in GitHub Desktop.
Save wfng92/86003650fdc4648d06833b411060f474 to your computer and use it in GitHub Desktop.
"""Convert textcat annotation from JSONL to spaCy v3 .spacy format."""
import srsly
import typer
import warnings
from pathlib import Path
import spacy
from spacy.tokens import DocBin
def convert(lang: str, input_path: Path, output_path: Path):
nlp = spacy.blank(lang)
db = DocBin()
for line in srsly.read_jsonl(input_path):
doc = nlp.make_doc(line["text"])
doc.cats = line["cats"]
db.add(doc)
db.to_disk(output_path)
if __name__ == "__main__":
typer.run(convert)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment