Skip to content

Instantly share code, notes, and snippets.

@chauhanakash23
Created June 25, 2020 07:00
Show Gist options
  • Save chauhanakash23/caba470b769214aaeadf425ec476d803 to your computer and use it in GitHub Desktop.
Save chauhanakash23/caba470b769214aaeadf425ec476d803 to your computer and use it in GitHub Desktop.
Script to convert summarization data to tf_records.
import pandas as pd
import tensorflow as tf
save_path = "<Your path>/pegasus/data/testdata/test_pattern_1.tfrecords"
input_dict = dict(
inputs=[
# Your text inputs to be summarized.
],
targets=[
# Corresponding targets for the inputs.
]
)
data = pd.DataFrame(input_dict)
with tf.io.TFRecordWriter(save_path) as writer:
for row in data.values:
inputs, targets = row[:-1], row[-1]
example = tf.train.Example(
features=tf.train.Features(
feature={
"inputs": tf.train.Feature(bytes_list=tf.train.BytesList(value=[inputs[0].encode('utf-8')])),
"targets": tf.train.Feature(bytes_list=tf.train.BytesList(value=[targets.encode('utf-8')])),
}
)
)
writer.write(example.SerializeToString())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment