Skip to content

Instantly share code, notes, and snippets.

@tgamauf
Last active October 28, 2019 17:51
Show Gist options
  • Save tgamauf/5eb04f59becc045c88cba29fcd168d24 to your computer and use it in GitHub Desktop.
Save tgamauf/5eb04f59becc045c88cba29fcd168d24 to your computer and use it in GitHub Desktop.
Write movie ratings to TFRecord file, then read it back
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'Suggestion': 'Inception', 'Purchase Price': 9.99, 'Age': 29, 'Suggestion Purchased': 1.0, 'Movie': ['The Shawshank Redemption', 'Fight Club'], 'Movie Ratings': [9.0, 9.7]}\n"
]
}
],
"source": [
"# Create example data\n",
"data = {\n",
" 'Age': 29,\n",
" 'Movie': ['The Shawshank Redemption', 'Fight Club'],\n",
" 'Movie Ratings': [9.0, 9.7],\n",
" 'Suggestion': 'Inception',\n",
" 'Suggestion Purchased': 1.0,\n",
" 'Purchase Price': 9.99\n",
"}\n",
"\n",
"print(data)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"features {\n",
" feature {\n",
" key: \"Age\"\n",
" value {\n",
" int64_list {\n",
" value: 29\n",
" }\n",
" }\n",
" }\n",
" feature {\n",
" key: \"Movie\"\n",
" value {\n",
" bytes_list {\n",
" value: \"The Shawshank Redemption\"\n",
" value: \"Fight Club\"\n",
" }\n",
" }\n",
" }\n",
" feature {\n",
" key: \"Movie Ratings\"\n",
" value {\n",
" float_list {\n",
" value: 9.0\n",
" value: 9.699999809265137\n",
" }\n",
" }\n",
" }\n",
" feature {\n",
" key: \"Purchase Price\"\n",
" value {\n",
" float_list {\n",
" value: 9.989999771118164\n",
" }\n",
" }\n",
" }\n",
" feature {\n",
" key: \"Suggestion\"\n",
" value {\n",
" bytes_list {\n",
" value: \"Inception\"\n",
" }\n",
" }\n",
" }\n",
" feature {\n",
" key: \"Suggestion Purchased\"\n",
" value {\n",
" float_list {\n",
" value: 1.0\n",
" }\n",
" }\n",
" }\n",
"}\n",
"\n"
]
}
],
"source": [
"# Create the Example\n",
"example = tf.train.Example(features=tf.train.Features(feature={\n",
" 'Age': tf.train.Feature(\n",
" int64_list=tf.train.Int64List(value=[data['Age']])),\n",
" 'Movie': tf.train.Feature(\n",
" bytes_list=tf.train.BytesList(\n",
" value=[m.encode('utf-8') for m in data['Movie']])),\n",
" 'Movie Ratings': tf.train.Feature(\n",
" float_list=tf.train.FloatList(value=data['Movie Ratings'])),\n",
" 'Suggestion': tf.train.Feature(\n",
" bytes_list=tf.train.BytesList(\n",
" value=[data['Suggestion'].encode('utf-8')])),\n",
" 'Suggestion Purchased': tf.train.Feature(\n",
" float_list=tf.train.FloatList(\n",
" value=[data['Suggestion Purchased']])),\n",
" 'Purchase Price': tf.train.Feature(\n",
" float_list=tf.train.FloatList(value=[data['Purchase Price']]))\n",
"}))\n",
"\n",
"print(example)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Write TFrecord file\n",
"with tf.python_io.TFRecordWriter('customer_1.tfrecord') as writer:\n",
" writer.write(example.SerializeToString())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Suggestion: b'Inception'\n",
"Purchase Price: 9.989999771118164\n",
"Age: 29\n",
"Suggestion Purchased: 1.0\n",
"Movie: SparseTensorValue(indices=array([[0],\n",
" [1]]), values=array([b'The Shawshank Redemption', b'Fight Club'], dtype=object), dense_shape=array([2]))\n",
"Movie Ratings: SparseTensorValue(indices=array([[0],\n",
" [1]]), values=array([9. , 9.7], dtype=float32), dense_shape=array([2]))\n"
]
}
],
"source": [
"# Read and print data:\n",
"sess = tf.InteractiveSession()\n",
"\n",
"# Read TFRecord file\n",
"reader = tf.TFRecordReader()\n",
"filename_queue = tf.train.string_input_producer(['customer_1.tfrecord'])\n",
"\n",
"_, serialized_example = reader.read(filename_queue)\n",
"\n",
"# Define features\n",
"read_features = {\n",
" 'Age': tf.FixedLenFeature([], dtype=tf.int64),\n",
" 'Movie': tf.VarLenFeature(dtype=tf.string),\n",
" 'Movie Ratings': tf.VarLenFeature(dtype=tf.float32),\n",
" 'Suggestion': tf.FixedLenFeature([], dtype=tf.string),\n",
" 'Suggestion Purchased': tf.FixedLenFeature([], dtype=tf.float32),\n",
" 'Purchase Price': tf.FixedLenFeature([], dtype=tf.float32)}\n",
"\n",
"# Extract features from serialized data\n",
"read_data = tf.parse_single_example(serialized=serialized_example,\n",
" features=read_features)\n",
"\n",
"# Many tf.train functions use tf.train.QueueRunner,\n",
"# so we need to start it before we read\n",
"tf.train.start_queue_runners(sess)\n",
"\n",
"# Print features\n",
"for name, tensor in read_data.items():\n",
" print('{}: {}'.format(name, tensor.eval()))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment