Skip to content

Instantly share code, notes, and snippets.

@tgamauf
Last active February 11, 2021 06:54
Show Gist options
  • Save tgamauf/ab5efb632bca8594fb39940d0641dc84 to your computer and use it in GitHub Desktop.
Save tgamauf/ab5efb632bca8594fb39940d0641dc84 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'Favorites': ['Majesty Rose', 'Savannah Outen', 'One Direction'], 'Locale': 'pt_BR', 'Age': 19, 'Data': [{'Movie Name': 'The Shawshank Redemption', 'Movie Rating': 9.0, 'Actors': ['Tim Robbins', 'Morgan Freeman']}, {'Movie Name': 'Fight Club', 'Movie Rating': 9.7, 'Actors': ['Brad Pitt', 'Edward Norton', 'Helena Bonham Carter']}]}\n"
]
}
],
"source": [
"# Create example data\n",
"data = {\n",
" # Context\n",
" 'Locale': 'pt_BR',\n",
" 'Age': 19,\n",
" 'Favorites': ['Majesty Rose', 'Savannah Outen', 'One Direction'],\n",
" # Data\n",
" 'Data': [\n",
" { # Movie 1\n",
" 'Movie Name': 'The Shawshank Redemption',\n",
" 'Movie Rating': 9.0,\n",
" 'Actors': ['Tim Robbins', 'Morgan Freeman']\n",
" },\n",
" { # Movie 2\n",
" 'Movie Name': 'Fight Club',\n",
" 'Movie Rating': 9.7,\n",
" 'Actors': ['Brad Pitt', 'Edward Norton', 'Helena Bonham Carter']\n",
" }\n",
" ]\n",
"}\n",
"\n",
"print(data)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"context {\n",
" feature {\n",
" key: \"Age\"\n",
" value {\n",
" int64_list {\n",
" value: 19\n",
" }\n",
" }\n",
" }\n",
" feature {\n",
" key: \"Favorites\"\n",
" value {\n",
" bytes_list {\n",
" value: \"Majesty Rose\"\n",
" value: \"Savannah Outen\"\n",
" value: \"One Direction\"\n",
" }\n",
" }\n",
" }\n",
" feature {\n",
" key: \"Locale\"\n",
" value {\n",
" bytes_list {\n",
" value: \"pt_BR\"\n",
" }\n",
" }\n",
" }\n",
"}\n",
"feature_lists {\n",
" feature_list {\n",
" key: \"Movie Actors\"\n",
" value {\n",
" feature {\n",
" bytes_list {\n",
" value: \"Tim Robbins\"\n",
" value: \"Morgan Freeman\"\n",
" }\n",
" }\n",
" feature {\n",
" bytes_list {\n",
" value: \"Brad Pitt\"\n",
" value: \"Edward Norton\"\n",
" value: \"Helena Bonham Carter\"\n",
" }\n",
" }\n",
" }\n",
" }\n",
" feature_list {\n",
" key: \"Movie Names\"\n",
" value {\n",
" feature {\n",
" bytes_list {\n",
" value: \"The Shawshank Redemption\"\n",
" }\n",
" }\n",
" feature {\n",
" bytes_list {\n",
" value: \"Fight Club\"\n",
" }\n",
" }\n",
" }\n",
" }\n",
" feature_list {\n",
" key: \"Movie Ratings\"\n",
" value {\n",
" feature {\n",
" float_list {\n",
" value: 9.0\n",
" }\n",
" }\n",
" feature {\n",
" float_list {\n",
" value: 9.699999809265137\n",
" }\n",
" }\n",
" }\n",
" }\n",
"}\n",
"\n"
]
}
],
"source": [
"# Create the context features (short form)\n",
"customer = tf.train.Features(feature={\n",
" 'Locale': tf.train.Feature(bytes_list=tf.train.BytesList(\n",
" value=[data['Locale'].encode('utf-8')])),\n",
" 'Age': tf.train.Feature(int64_list=tf.train.Int64List(\n",
" value=[data['Age']])),\n",
" 'Favorites': tf.train.Feature(bytes_list=tf.train.BytesList(\n",
" value=[m.encode('utf-8') for m in data['Favorites']]))\n",
"})\n",
"\n",
"# Create sequence data\n",
"names_features = []\n",
"ratings_features = []\n",
"actors_features = []\n",
"for movie in data['Data']:\n",
" # Create each of the features, then add it to the\n",
" # corresponding feature list\n",
" movie_name_feature = tf.train.Feature(\n",
" bytes_list=tf.train.BytesList(\n",
" value=[movie['Movie Name'].encode('utf-8')]))\n",
" names_features.append(movie_name_feature)\n",
" \n",
" movie_rating_feature = tf.train.Feature(\n",
" float_list=tf.train.FloatList(value=[movie['Movie Rating']]))\n",
" ratings_features.append(movie_rating_feature)\n",
" \n",
" movie_actors_feature = tf.train.Feature(\n",
" bytes_list=tf.train.BytesList(\n",
" value=[m.encode('utf-8') for m in movie['Actors']]))\n",
" actors_features.append(movie_actors_feature)\n",
"\n",
"movie_names = tf.train.FeatureList(feature=names_features)\n",
"movie_ratings = tf.train.FeatureList(feature=ratings_features)\n",
"movie_actors = tf.train.FeatureList(feature=actors_features)\n",
"\n",
"movies = tf.train.FeatureLists(feature_list={\n",
" 'Movie Names': movie_names,\n",
" 'Movie Ratings': movie_ratings,\n",
" 'Movie Actors': movie_actors\n",
"})\n",
"\n",
"# Create the SequenceExample\n",
"example = tf.train.SequenceExample(context=customer,\n",
" feature_lists=movies)\n",
"\n",
"print(example)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Write TFrecord file\n",
"with tf.python_io.TFRecordWriter('customer_1.tfrecord') as writer:\n",
" writer.write(example.SerializeToString())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Context:\n",
"Favorites: SparseTensorValue(indices=array([[0],\n",
" [1],\n",
" [2]]), values=array([b'Majesty Rose', b'Savannah Outen', b'One Direction'], dtype=object), dense_shape=array([3]))\n",
"Locale: b'pt_BR'\n",
"Age: 19\n",
"\n",
"Data\n",
"Movie Names: [b'The Shawshank Redemption' b'Fight Club']\n",
"Movie Actors: SparseTensorValue(indices=array([[0, 0],\n",
" [0, 1],\n",
" [1, 0],\n",
" [1, 1],\n",
" [1, 2]]), values=array([b'Tim Robbins', b'Morgan Freeman', b'Brad Pitt', b'Edward Norton',\n",
" b'Helena Bonham Carter'], dtype=object), dense_shape=array([2, 3]))\n",
"Movie Ratings: [9. 9.7]\n"
]
}
],
"source": [
"# Read and print data:\n",
"sess = tf.InteractiveSession()\n",
"\n",
"# Read TFRecord file\n",
"reader = tf.TFRecordReader()\n",
"filename_queue = tf.train.string_input_producer(['customer_1.tfrecord'])\n",
"\n",
"_, serialized_example = reader.read(filename_queue)\n",
"\n",
"# Define features\n",
"context_features = {\n",
" 'Locale': tf.FixedLenFeature([], dtype=tf.string),\n",
" 'Age': tf.FixedLenFeature([], dtype=tf.int64),\n",
" 'Favorites': tf.VarLenFeature(dtype=tf.string)\n",
"}\n",
"sequence_features = {\n",
" 'Movie Names': tf.FixedLenSequenceFeature([], dtype=tf.string),\n",
" 'Movie Ratings': tf.FixedLenSequenceFeature([], dtype=tf.float32),\n",
" 'Movie Actors': tf.VarLenFeature(dtype=tf.string)\n",
"}\n",
"\n",
"# Extract features from serialized data\n",
"context_data, sequence_data = tf.parse_single_sequence_example(\n",
" serialized=serialized_example,\n",
" context_features=context_features,\n",
" sequence_features=sequence_features)\n",
"\n",
"# Many tf.train functions use tf.train.QueueRunner,\n",
"# so we need to start it before we read\n",
"tf.train.start_queue_runners(sess)\n",
"\n",
"# Print features\n",
"print('Context:')\n",
"for name, tensor in context_data.items():\n",
" print('{}: {}'.format(name, tensor.eval()))\n",
"\n",
"print('\\nData')\n",
"for name, tensor in sequence_data.items():\n",
" print('{}: {}'.format(name, tensor.eval()))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment