tgamauf/write_tfrecord_with_sequenceexample.ipynb

## write_tfrecord_with_sequenceexample.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'Favorites': ['Majesty Rose', 'Savannah Outen', 'One Direction'], 'Locale': 'pt_BR', 'Age': 19, 'Data': [{'Movie Name': 'The Shawshank Redemption', 'Movie Rating': 9.0, 'Actors': ['Tim Robbins', 'Morgan Freeman']}, {'Movie Name': 'Fight Club', 'Movie Rating': 9.7, 'Actors': ['Brad Pitt', 'Edward Norton', 'Helena Bonham Carter']}]}\n"
     ]
    }
   ],
   "source": [
    "# Create example data\n",
    "data = {\n",
    "    # Context\n",
    "    'Locale': 'pt_BR',\n",
    "    'Age': 19,\n",
    "    'Favorites': ['Majesty Rose', 'Savannah Outen', 'One Direction'],\n",
    "    # Data\n",
    "    'Data': [\n",
    "        {   # Movie 1\n",
    "            'Movie Name': 'The Shawshank Redemption',\n",
    "            'Movie Rating': 9.0,\n",
    "            'Actors': ['Tim Robbins', 'Morgan Freeman']\n",
    "        },\n",
    "        {   # Movie 2\n",
    "            'Movie Name': 'Fight Club',\n",
    "            'Movie Rating': 9.7,\n",
    "            'Actors': ['Brad Pitt', 'Edward Norton', 'Helena Bonham Carter']\n",
    "        }\n",
    "    ]\n",
    "}\n",
    "\n",
    "print(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "context {\n",
      "  feature {\n",
      "    key: \"Age\"\n",
      "    value {\n",
      "      int64_list {\n",
      "        value: 19\n",
      "      }\n",
      "    }\n",
      "  }\n",
      "  feature {\n",
      "    key: \"Favorites\"\n",
      "    value {\n",
      "      bytes_list {\n",
      "        value: \"Majesty Rose\"\n",
      "        value: \"Savannah Outen\"\n",
      "        value: \"One Direction\"\n",
      "      }\n",
      "    }\n",
      "  }\n",
      "  feature {\n",
      "    key: \"Locale\"\n",
      "    value {\n",
      "      bytes_list {\n",
      "        value: \"pt_BR\"\n",
      "      }\n",
      "    }\n",
      "  }\n",
      "}\n",
      "feature_lists {\n",
      "  feature_list {\n",
      "    key: \"Movie Actors\"\n",
      "    value {\n",
      "      feature {\n",
      "        bytes_list {\n",
      "          value: \"Tim Robbins\"\n",
      "          value: \"Morgan Freeman\"\n",
      "        }\n",
      "      }\n",
      "      feature {\n",
      "        bytes_list {\n",
      "          value: \"Brad Pitt\"\n",
      "          value: \"Edward Norton\"\n",
      "          value: \"Helena Bonham Carter\"\n",
      "        }\n",
      "      }\n",
      "    }\n",
      "  }\n",
      "  feature_list {\n",
      "    key: \"Movie Names\"\n",
      "    value {\n",
      "      feature {\n",
      "        bytes_list {\n",
      "          value: \"The Shawshank Redemption\"\n",
      "        }\n",
      "      }\n",
      "      feature {\n",
      "        bytes_list {\n",
      "          value: \"Fight Club\"\n",
      "        }\n",
      "      }\n",
      "    }\n",
      "  }\n",
      "  feature_list {\n",
      "    key: \"Movie Ratings\"\n",
      "    value {\n",
      "      feature {\n",
      "        float_list {\n",
      "          value: 9.0\n",
      "        }\n",
      "      }\n",
      "      feature {\n",
      "        float_list {\n",
      "          value: 9.699999809265137\n",
      "        }\n",
      "      }\n",
      "    }\n",
      "  }\n",
      "}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Create the context features (short form)\n",
    "customer = tf.train.Features(feature={\n",
    "    'Locale': tf.train.Feature(bytes_list=tf.train.BytesList(\n",
    "        value=[data['Locale'].encode('utf-8')])),\n",
    "    'Age': tf.train.Feature(int64_list=tf.train.Int64List(\n",
    "        value=[data['Age']])),\n",
    "    'Favorites': tf.train.Feature(bytes_list=tf.train.BytesList(\n",
    "        value=[m.encode('utf-8') for m in data['Favorites']]))\n",
    "})\n",
    "\n",
    "# Create sequence data\n",
    "names_features = []\n",
    "ratings_features = []\n",
    "actors_features = []\n",
    "for movie in data['Data']:\n",
    "    # Create each of the features, then add it to the\n",
    "    # corresponding feature list\n",
    "    movie_name_feature = tf.train.Feature(\n",
    "        bytes_list=tf.train.BytesList(\n",
    "            value=[movie['Movie Name'].encode('utf-8')]))\n",
    "    names_features.append(movie_name_feature)\n",
    "    \n",
    "    movie_rating_feature = tf.train.Feature(\n",
    "        float_list=tf.train.FloatList(value=[movie['Movie Rating']]))\n",
    "    ratings_features.append(movie_rating_feature)\n",
    "                                             \n",
    "    movie_actors_feature = tf.train.Feature(\n",
    "        bytes_list=tf.train.BytesList(\n",
    "            value=[m.encode('utf-8') for m in movie['Actors']]))\n",
    "    actors_features.append(movie_actors_feature)\n",
    "\n",
    "movie_names = tf.train.FeatureList(feature=names_features)\n",
    "movie_ratings = tf.train.FeatureList(feature=ratings_features)\n",
    "movie_actors = tf.train.FeatureList(feature=actors_features)\n",
    "\n",
    "movies = tf.train.FeatureLists(feature_list={\n",
    "    'Movie Names': movie_names,\n",
    "    'Movie Ratings': movie_ratings,\n",
    "    'Movie Actors': movie_actors\n",
    "})\n",
    "\n",
    "# Create the SequenceExample\n",
    "example = tf.train.SequenceExample(context=customer,\n",
    "                                   feature_lists=movies)\n",
    "\n",
    "print(example)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Write TFrecord file\n",
    "with tf.python_io.TFRecordWriter('customer_1.tfrecord') as writer:\n",
    "    writer.write(example.SerializeToString())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Context:\n",
      "Favorites: SparseTensorValue(indices=array([[0],\n",
      "       [1],\n",
      "       [2]]), values=array([b'Majesty Rose', b'Savannah Outen', b'One Direction'], dtype=object), dense_shape=array([3]))\n",
      "Locale: b'pt_BR'\n",
      "Age: 19\n",
      "\n",
      "Data\n",
      "Movie Names: [b'The Shawshank Redemption' b'Fight Club']\n",
      "Movie Actors: SparseTensorValue(indices=array([[0, 0],\n",
      "       [0, 1],\n",
      "       [1, 0],\n",
      "       [1, 1],\n",
      "       [1, 2]]), values=array([b'Tim Robbins', b'Morgan Freeman', b'Brad Pitt', b'Edward Norton',\n",
      "       b'Helena Bonham Carter'], dtype=object), dense_shape=array([2, 3]))\n",
      "Movie Ratings: [9.  9.7]\n"
     ]
    }
   ],
   "source": [
    "# Read and print data:\n",
    "sess = tf.InteractiveSession()\n",
    "\n",
    "# Read TFRecord file\n",
    "reader = tf.TFRecordReader()\n",
    "filename_queue = tf.train.string_input_producer(['customer_1.tfrecord'])\n",
    "\n",
    "_, serialized_example = reader.read(filename_queue)\n",
    "\n",
    "# Define features\n",
    "context_features = {\n",
    "    'Locale': tf.FixedLenFeature([], dtype=tf.string),\n",
    "    'Age': tf.FixedLenFeature([], dtype=tf.int64),\n",
    "    'Favorites': tf.VarLenFeature(dtype=tf.string)\n",
    "}\n",
    "sequence_features = {\n",
    "    'Movie Names': tf.FixedLenSequenceFeature([], dtype=tf.string),\n",
    "    'Movie Ratings': tf.FixedLenSequenceFeature([], dtype=tf.float32),\n",
    "    'Movie Actors': tf.VarLenFeature(dtype=tf.string)\n",
    "}\n",
    "\n",
    "# Extract features from serialized data\n",
    "context_data, sequence_data = tf.parse_single_sequence_example(\n",
    "    serialized=serialized_example,\n",
    "    context_features=context_features,\n",
    "    sequence_features=sequence_features)\n",
    "\n",
    "# Many tf.train functions use tf.train.QueueRunner,\n",
    "# so we need to start it before we read\n",
    "tf.train.start_queue_runners(sess)\n",
    "\n",
    "# Print features\n",
    "print('Context:')\n",
    "for name, tensor in context_data.items():\n",
    "    print('{}: {}'.format(name, tensor.eval()))\n",
    "\n",
    "print('\\nData')\n",
    "for name, tensor in sequence_data.items():\n",
    "    print('{}: {}'.format(name, tensor.eval()))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import tensorflow as tf"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"{'Favorites': ['Majesty Rose', 'Savannah Outen', 'One Direction'], 'Locale': 'pt_BR', 'Age': 19, 'Data': [{'Movie Name': 'The Shawshank Redemption', 'Movie Rating': 9.0, 'Actors': ['Tim Robbins', 'Morgan Freeman']}, {'Movie Name': 'Fight Club', 'Movie Rating': 9.7, 'Actors': ['Brad Pitt', 'Edward Norton', 'Helena Bonham Carter']}]}\n"
	]
	}
	],
	"source": [
	"# Create example data\n",
	"data = {\n",
	" # Context\n",
	" 'Locale': 'pt_BR',\n",
	" 'Age': 19,\n",
	" 'Favorites': ['Majesty Rose', 'Savannah Outen', 'One Direction'],\n",
	" # Data\n",
	" 'Data': [\n",
	" { # Movie 1\n",
	" 'Movie Name': 'The Shawshank Redemption',\n",
	" 'Movie Rating': 9.0,\n",
	" 'Actors': ['Tim Robbins', 'Morgan Freeman']\n",
	" },\n",
	" { # Movie 2\n",
	" 'Movie Name': 'Fight Club',\n",
	" 'Movie Rating': 9.7,\n",
	" 'Actors': ['Brad Pitt', 'Edward Norton', 'Helena Bonham Carter']\n",
	" }\n",
	" ]\n",
	"}\n",
	"\n",
	"print(data)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"context {\n",
	" feature {\n",
	" key: \"Age\"\n",
	" value {\n",
	" int64_list {\n",
	" value: 19\n",
	" }\n",
	" }\n",
	" }\n",
	" feature {\n",
	" key: \"Favorites\"\n",
	" value {\n",
	" bytes_list {\n",
	" value: \"Majesty Rose\"\n",
	" value: \"Savannah Outen\"\n",
	" value: \"One Direction\"\n",
	" }\n",
	" }\n",
	" }\n",
	" feature {\n",
	" key: \"Locale\"\n",
	" value {\n",
	" bytes_list {\n",
	" value: \"pt_BR\"\n",
	" }\n",
	" }\n",
	" }\n",
	"}\n",
	"feature_lists {\n",
	" feature_list {\n",
	" key: \"Movie Actors\"\n",
	" value {\n",
	" feature {\n",
	" bytes_list {\n",
	" value: \"Tim Robbins\"\n",
	" value: \"Morgan Freeman\"\n",
	" }\n",
	" }\n",
	" feature {\n",
	" bytes_list {\n",
	" value: \"Brad Pitt\"\n",
	" value: \"Edward Norton\"\n",
	" value: \"Helena Bonham Carter\"\n",
	" }\n",
	" }\n",
	" }\n",
	" }\n",
	" feature_list {\n",
	" key: \"Movie Names\"\n",
	" value {\n",
	" feature {\n",
	" bytes_list {\n",
	" value: \"The Shawshank Redemption\"\n",
	" }\n",
	" }\n",
	" feature {\n",
	" bytes_list {\n",
	" value: \"Fight Club\"\n",
	" }\n",
	" }\n",
	" }\n",
	" }\n",
	" feature_list {\n",
	" key: \"Movie Ratings\"\n",
	" value {\n",
	" feature {\n",
	" float_list {\n",
	" value: 9.0\n",
	" }\n",
	" }\n",
	" feature {\n",
	" float_list {\n",
	" value: 9.699999809265137\n",
	" }\n",
	" }\n",
	" }\n",
	" }\n",
	"}\n",
	"\n"
	]
	}
	],
	"source": [
	"# Create the context features (short form)\n",
	"customer = tf.train.Features(feature={\n",
	" 'Locale': tf.train.Feature(bytes_list=tf.train.BytesList(\n",
	" value=[data['Locale'].encode('utf-8')])),\n",
	" 'Age': tf.train.Feature(int64_list=tf.train.Int64List(\n",
	" value=[data['Age']])),\n",
	" 'Favorites': tf.train.Feature(bytes_list=tf.train.BytesList(\n",
	" value=[m.encode('utf-8') for m in data['Favorites']]))\n",
	"})\n",
	"\n",
	"# Create sequence data\n",
	"names_features = []\n",
	"ratings_features = []\n",
	"actors_features = []\n",
	"for movie in data['Data']:\n",
	" # Create each of the features, then add it to the\n",
	" # corresponding feature list\n",
	" movie_name_feature = tf.train.Feature(\n",
	" bytes_list=tf.train.BytesList(\n",
	" value=[movie['Movie Name'].encode('utf-8')]))\n",
	" names_features.append(movie_name_feature)\n",
	" \n",
	" movie_rating_feature = tf.train.Feature(\n",
	" float_list=tf.train.FloatList(value=[movie['Movie Rating']]))\n",
	" ratings_features.append(movie_rating_feature)\n",
	" \n",
	" movie_actors_feature = tf.train.Feature(\n",
	" bytes_list=tf.train.BytesList(\n",
	" value=[m.encode('utf-8') for m in movie['Actors']]))\n",
	" actors_features.append(movie_actors_feature)\n",
	"\n",
	"movie_names = tf.train.FeatureList(feature=names_features)\n",
	"movie_ratings = tf.train.FeatureList(feature=ratings_features)\n",
	"movie_actors = tf.train.FeatureList(feature=actors_features)\n",
	"\n",
	"movies = tf.train.FeatureLists(feature_list={\n",
	" 'Movie Names': movie_names,\n",
	" 'Movie Ratings': movie_ratings,\n",
	" 'Movie Actors': movie_actors\n",
	"})\n",
	"\n",
	"# Create the SequenceExample\n",
	"example = tf.train.SequenceExample(context=customer,\n",
	" feature_lists=movies)\n",
	"\n",
	"print(example)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Write TFrecord file\n",
	"with tf.python_io.TFRecordWriter('customer_1.tfrecord') as writer:\n",
	" writer.write(example.SerializeToString())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Context:\n",
	"Favorites: SparseTensorValue(indices=array([[0],\n",
	" [1],\n",
	" [2]]), values=array([b'Majesty Rose', b'Savannah Outen', b'One Direction'], dtype=object), dense_shape=array([3]))\n",
	"Locale: b'pt_BR'\n",
	"Age: 19\n",
	"\n",
	"Data\n",
	"Movie Names: [b'The Shawshank Redemption' b'Fight Club']\n",
	"Movie Actors: SparseTensorValue(indices=array([[0, 0],\n",
	" [0, 1],\n",
	" [1, 0],\n",
	" [1, 1],\n",
	" [1, 2]]), values=array([b'Tim Robbins', b'Morgan Freeman', b'Brad Pitt', b'Edward Norton',\n",
	" b'Helena Bonham Carter'], dtype=object), dense_shape=array([2, 3]))\n",
	"Movie Ratings: [9. 9.7]\n"
	]
	}
	],
	"source": [
	"# Read and print data:\n",
	"sess = tf.InteractiveSession()\n",
	"\n",
	"# Read TFRecord file\n",
	"reader = tf.TFRecordReader()\n",
	"filename_queue = tf.train.string_input_producer(['customer_1.tfrecord'])\n",
	"\n",
	"_, serialized_example = reader.read(filename_queue)\n",
	"\n",
	"# Define features\n",
	"context_features = {\n",
	" 'Locale': tf.FixedLenFeature([], dtype=tf.string),\n",
	" 'Age': tf.FixedLenFeature([], dtype=tf.int64),\n",
	" 'Favorites': tf.VarLenFeature(dtype=tf.string)\n",
	"}\n",
	"sequence_features = {\n",
	" 'Movie Names': tf.FixedLenSequenceFeature([], dtype=tf.string),\n",
	" 'Movie Ratings': tf.FixedLenSequenceFeature([], dtype=tf.float32),\n",
	" 'Movie Actors': tf.VarLenFeature(dtype=tf.string)\n",
	"}\n",
	"\n",
	"# Extract features from serialized data\n",
	"context_data, sequence_data = tf.parse_single_sequence_example(\n",
	" serialized=serialized_example,\n",
	" context_features=context_features,\n",
	" sequence_features=sequence_features)\n",
	"\n",
	"# Many tf.train functions use tf.train.QueueRunner,\n",
	"# so we need to start it before we read\n",
	"tf.train.start_queue_runners(sess)\n",
	"\n",
	"# Print features\n",
	"print('Context:')\n",
	"for name, tensor in context_data.items():\n",
	" print('{}: {}'.format(name, tensor.eval()))\n",
	"\n",
	"print('\\nData')\n",
	"for name, tensor in sequence_data.items():\n",
	" print('{}: {}'.format(name, tensor.eval()))"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}