mikewcasale/narcissus.py

## narcissus.py
t5.data.TaskRegistry.remove('narcissus')
t5.data.TaskRegistry.add(
    "narcissus",
    # Supply a function which returns a tf.data.Dataset.
    dataset_fn=ds_func,
    splits=["train", "valid"],
    # Supply a function which preprocesses text from the tf.data.Dataset.
    text_preprocessor=[,
        lambda sample: t5.data.preprocessors.prefix_lm(sample, label='article: ')
    ],
    # Use the same vocabulary that we used for pre-training.
    sentencepiece_model_path=t5.data.DEFAULT_SPM_PATH,
    # Lowercase targets before computing metrics.
    postprocess_fn=t5.data.postprocessors.lower_text,
    # We'll use accuracy as our evaluation metric.
    metric_fns=[t5.evaluation.metrics.accuracy],
    # Not required, but helps for mixing and auto-caching.
    num_input_examples=num_nq_examples
)
	t5.data.TaskRegistry.remove('narcissus')
	t5.data.TaskRegistry.add(
	"narcissus",
	# Supply a function which returns a tf.data.Dataset.
	dataset_fn=ds_func,
	splits=["train", "valid"],
	# Supply a function which preprocesses text from the tf.data.Dataset.
	text_preprocessor=[,
	lambda sample: t5.data.preprocessors.prefix_lm(sample, label='article: ')
	],
	# Use the same vocabulary that we used for pre-training.
	sentencepiece_model_path=t5.data.DEFAULT_SPM_PATH,
	# Lowercase targets before computing metrics.
	postprocess_fn=t5.data.postprocessors.lower_text,
	# We'll use accuracy as our evaluation metric.
	metric_fns=[t5.evaluation.metrics.accuracy],
	# Not required, but helps for mixing and auto-caching.
	num_input_examples=num_nq_examples
	)