Skip to content

Instantly share code, notes, and snippets.

@magdaaniol
magdaaniol / add_pos.py
Last active May 1, 2024 15:55
A script to preprocess Prodigy JSONL stream by adding POS spans from a spaCy pipeline
import copy
import json
from typing import Dict, List, Optional, Tuple
import spacy
import srsly
from spacy.language import Language
from spacy.tokens import Doc, Span
from spacy.util import filter_spans
from wasabi import msg
@magdaaniol
magdaaniol / merge_snippets.py
Created March 5, 2024 18:13
Utilities to split long Prodigy annotated documents into snippets and back
from pathlib import Path
import srsly
import spacy
import typer
from util import Snippet, Document
from collections import defaultdict
def main(snippets_path: Path, articles_path: Path) -> None:
"""