Last active
February 11, 2021 06:54
-
-
Save tgamauf/ab5efb632bca8594fb39940d0641dc84 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import tensorflow as tf" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'Favorites': ['Majesty Rose', 'Savannah Outen', 'One Direction'], 'Locale': 'pt_BR', 'Age': 19, 'Data': [{'Movie Name': 'The Shawshank Redemption', 'Movie Rating': 9.0, 'Actors': ['Tim Robbins', 'Morgan Freeman']}, {'Movie Name': 'Fight Club', 'Movie Rating': 9.7, 'Actors': ['Brad Pitt', 'Edward Norton', 'Helena Bonham Carter']}]}\n" | |
] | |
} | |
], | |
"source": [ | |
"# Create example data\n", | |
"data = {\n", | |
" # Context\n", | |
" 'Locale': 'pt_BR',\n", | |
" 'Age': 19,\n", | |
" 'Favorites': ['Majesty Rose', 'Savannah Outen', 'One Direction'],\n", | |
" # Data\n", | |
" 'Data': [\n", | |
" { # Movie 1\n", | |
" 'Movie Name': 'The Shawshank Redemption',\n", | |
" 'Movie Rating': 9.0,\n", | |
" 'Actors': ['Tim Robbins', 'Morgan Freeman']\n", | |
" },\n", | |
" { # Movie 2\n", | |
" 'Movie Name': 'Fight Club',\n", | |
" 'Movie Rating': 9.7,\n", | |
" 'Actors': ['Brad Pitt', 'Edward Norton', 'Helena Bonham Carter']\n", | |
" }\n", | |
" ]\n", | |
"}\n", | |
"\n", | |
"print(data)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"context {\n", | |
" feature {\n", | |
" key: \"Age\"\n", | |
" value {\n", | |
" int64_list {\n", | |
" value: 19\n", | |
" }\n", | |
" }\n", | |
" }\n", | |
" feature {\n", | |
" key: \"Favorites\"\n", | |
" value {\n", | |
" bytes_list {\n", | |
" value: \"Majesty Rose\"\n", | |
" value: \"Savannah Outen\"\n", | |
" value: \"One Direction\"\n", | |
" }\n", | |
" }\n", | |
" }\n", | |
" feature {\n", | |
" key: \"Locale\"\n", | |
" value {\n", | |
" bytes_list {\n", | |
" value: \"pt_BR\"\n", | |
" }\n", | |
" }\n", | |
" }\n", | |
"}\n", | |
"feature_lists {\n", | |
" feature_list {\n", | |
" key: \"Movie Actors\"\n", | |
" value {\n", | |
" feature {\n", | |
" bytes_list {\n", | |
" value: \"Tim Robbins\"\n", | |
" value: \"Morgan Freeman\"\n", | |
" }\n", | |
" }\n", | |
" feature {\n", | |
" bytes_list {\n", | |
" value: \"Brad Pitt\"\n", | |
" value: \"Edward Norton\"\n", | |
" value: \"Helena Bonham Carter\"\n", | |
" }\n", | |
" }\n", | |
" }\n", | |
" }\n", | |
" feature_list {\n", | |
" key: \"Movie Names\"\n", | |
" value {\n", | |
" feature {\n", | |
" bytes_list {\n", | |
" value: \"The Shawshank Redemption\"\n", | |
" }\n", | |
" }\n", | |
" feature {\n", | |
" bytes_list {\n", | |
" value: \"Fight Club\"\n", | |
" }\n", | |
" }\n", | |
" }\n", | |
" }\n", | |
" feature_list {\n", | |
" key: \"Movie Ratings\"\n", | |
" value {\n", | |
" feature {\n", | |
" float_list {\n", | |
" value: 9.0\n", | |
" }\n", | |
" }\n", | |
" feature {\n", | |
" float_list {\n", | |
" value: 9.699999809265137\n", | |
" }\n", | |
" }\n", | |
" }\n", | |
" }\n", | |
"}\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"# Create the context features (short form)\n", | |
"customer = tf.train.Features(feature={\n", | |
" 'Locale': tf.train.Feature(bytes_list=tf.train.BytesList(\n", | |
" value=[data['Locale'].encode('utf-8')])),\n", | |
" 'Age': tf.train.Feature(int64_list=tf.train.Int64List(\n", | |
" value=[data['Age']])),\n", | |
" 'Favorites': tf.train.Feature(bytes_list=tf.train.BytesList(\n", | |
" value=[m.encode('utf-8') for m in data['Favorites']]))\n", | |
"})\n", | |
"\n", | |
"# Create sequence data\n", | |
"names_features = []\n", | |
"ratings_features = []\n", | |
"actors_features = []\n", | |
"for movie in data['Data']:\n", | |
" # Create each of the features, then add it to the\n", | |
" # corresponding feature list\n", | |
" movie_name_feature = tf.train.Feature(\n", | |
" bytes_list=tf.train.BytesList(\n", | |
" value=[movie['Movie Name'].encode('utf-8')]))\n", | |
" names_features.append(movie_name_feature)\n", | |
" \n", | |
" movie_rating_feature = tf.train.Feature(\n", | |
" float_list=tf.train.FloatList(value=[movie['Movie Rating']]))\n", | |
" ratings_features.append(movie_rating_feature)\n", | |
" \n", | |
" movie_actors_feature = tf.train.Feature(\n", | |
" bytes_list=tf.train.BytesList(\n", | |
" value=[m.encode('utf-8') for m in movie['Actors']]))\n", | |
" actors_features.append(movie_actors_feature)\n", | |
"\n", | |
"movie_names = tf.train.FeatureList(feature=names_features)\n", | |
"movie_ratings = tf.train.FeatureList(feature=ratings_features)\n", | |
"movie_actors = tf.train.FeatureList(feature=actors_features)\n", | |
"\n", | |
"movies = tf.train.FeatureLists(feature_list={\n", | |
" 'Movie Names': movie_names,\n", | |
" 'Movie Ratings': movie_ratings,\n", | |
" 'Movie Actors': movie_actors\n", | |
"})\n", | |
"\n", | |
"# Create the SequenceExample\n", | |
"example = tf.train.SequenceExample(context=customer,\n", | |
" feature_lists=movies)\n", | |
"\n", | |
"print(example)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Write TFrecord file\n", | |
"with tf.python_io.TFRecordWriter('customer_1.tfrecord') as writer:\n", | |
" writer.write(example.SerializeToString())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Context:\n", | |
"Favorites: SparseTensorValue(indices=array([[0],\n", | |
" [1],\n", | |
" [2]]), values=array([b'Majesty Rose', b'Savannah Outen', b'One Direction'], dtype=object), dense_shape=array([3]))\n", | |
"Locale: b'pt_BR'\n", | |
"Age: 19\n", | |
"\n", | |
"Data\n", | |
"Movie Names: [b'The Shawshank Redemption' b'Fight Club']\n", | |
"Movie Actors: SparseTensorValue(indices=array([[0, 0],\n", | |
" [0, 1],\n", | |
" [1, 0],\n", | |
" [1, 1],\n", | |
" [1, 2]]), values=array([b'Tim Robbins', b'Morgan Freeman', b'Brad Pitt', b'Edward Norton',\n", | |
" b'Helena Bonham Carter'], dtype=object), dense_shape=array([2, 3]))\n", | |
"Movie Ratings: [9. 9.7]\n" | |
] | |
} | |
], | |
"source": [ | |
"# Read and print data:\n", | |
"sess = tf.InteractiveSession()\n", | |
"\n", | |
"# Read TFRecord file\n", | |
"reader = tf.TFRecordReader()\n", | |
"filename_queue = tf.train.string_input_producer(['customer_1.tfrecord'])\n", | |
"\n", | |
"_, serialized_example = reader.read(filename_queue)\n", | |
"\n", | |
"# Define features\n", | |
"context_features = {\n", | |
" 'Locale': tf.FixedLenFeature([], dtype=tf.string),\n", | |
" 'Age': tf.FixedLenFeature([], dtype=tf.int64),\n", | |
" 'Favorites': tf.VarLenFeature(dtype=tf.string)\n", | |
"}\n", | |
"sequence_features = {\n", | |
" 'Movie Names': tf.FixedLenSequenceFeature([], dtype=tf.string),\n", | |
" 'Movie Ratings': tf.FixedLenSequenceFeature([], dtype=tf.float32),\n", | |
" 'Movie Actors': tf.VarLenFeature(dtype=tf.string)\n", | |
"}\n", | |
"\n", | |
"# Extract features from serialized data\n", | |
"context_data, sequence_data = tf.parse_single_sequence_example(\n", | |
" serialized=serialized_example,\n", | |
" context_features=context_features,\n", | |
" sequence_features=sequence_features)\n", | |
"\n", | |
"# Many tf.train functions use tf.train.QueueRunner,\n", | |
"# so we need to start it before we read\n", | |
"tf.train.start_queue_runners(sess)\n", | |
"\n", | |
"# Print features\n", | |
"print('Context:')\n", | |
"for name, tensor in context_data.items():\n", | |
" print('{}: {}'.format(name, tensor.eval()))\n", | |
"\n", | |
"print('\\nData')\n", | |
"for name, tensor in sequence_data.items():\n", | |
" print('{}: {}'.format(name, tensor.eval()))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment