wesslen/textcat_sent_sequence.py

## textcat_sent_sequence.py
import prodigy
import spacy
from prodigy.components.loaders import JSONL

@prodigy.recipe(
    "textcat_sent_sequence",
    dataset=("Dataset to save answers to", "positional", None, str),
    examples=("Examples to load from disk", "positional", None, str),
    model=("spaCy model to load", "positional", None, str),
    label=("Label for annotated data", "positional", None, str),
)
def textcat_topic(dataset, examples, model, label):
    # import spaCy
    nlp = spacy.load(model)

    # set up stream; may want get_stream() instead to hash/avoid dedup
    stream = JSONL(examples)

    # Render highlight of each sentence
    def add_html(examples):
        for ex in examples:
            doc = nlp(ex["paragraph"])

            for sent in doc.sents:
                summary_highlight = ex["paragraph"]
                summary_highlight = summary_highlight.replace(
                    sent.text, f"<b style='background-color: yellow;'>{sent.text}</b>"
                )
                ex["sentence"] = sent.text
                ex["html"] = f"{summary_highlight}"
                ex["label"] = label
                yield ex

    # delete html key in output data
    def before_db(examples):
        for ex in examples:
            del ex["html"]
        return examples

    return {
        "before_db": before_db,
        "dataset": dataset,
        "stream": add_html(stream),
        "view_id": "classification",
    }
	import prodigy
	import spacy
	from prodigy.components.loaders import JSONL

	@prodigy.recipe(
	"textcat_sent_sequence",
	dataset=("Dataset to save answers to", "positional", None, str),
	examples=("Examples to load from disk", "positional", None, str),
	model=("spaCy model to load", "positional", None, str),
	label=("Label for annotated data", "positional", None, str),
	)
	def textcat_topic(dataset, examples, model, label):
	# import spaCy
	nlp = spacy.load(model)

	# set up stream; may want get_stream() instead to hash/avoid dedup
	stream = JSONL(examples)

	# Render highlight of each sentence
	def add_html(examples):
	for ex in examples:
	doc = nlp(ex["paragraph"])

	for sent in doc.sents:
	summary_highlight = ex["paragraph"]
	summary_highlight = summary_highlight.replace(
	sent.text, f"<b style='background-color: yellow;'>{sent.text}</b>"
	)
	ex["sentence"] = sent.text
	ex["html"] = f"{summary_highlight}"
	ex["label"] = label
	yield ex

	# delete html key in output data
	def before_db(examples):
	for ex in examples:
	del ex["html"]
	return examples

	return {
	"before_db": before_db,
	"dataset": dataset,
	"stream": add_html(stream),
	"view_id": "classification",
	}