Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Converting an OCR dataset for Tensorflow into a singular TFRecords file
import tensorflow as tf
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
writer = tf.python_io.TFRecordWriter('dataset.tfrecords')
with open('training-data.txt', 'r') as f:
pairs = [line.split() for line in f.readlines()]
for img_path, answer in pairs:
with open(img_path, 'rb') as img_file:
img = img_file.read()
example = tf.train.Example(features=tf.train.Features(feature={
'image': _bytes_feature(img),
'answer': _bytes_feature(answer)}))
writer.write(example.SerializeToString())
writer.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment