Skip to content

Instantly share code, notes, and snippets.

@graham-thomson
Created April 15, 2021 17:45
Show Gist options
  • Save graham-thomson/1137655bbe8abdc05a62b42e8b242e59 to your computer and use it in GitHub Desktop.
Save graham-thomson/1137655bbe8abdc05a62b42e8b242e59 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import tensorboard\n",
"import tensorflow as tf\n",
"import tensorflow.feature_column as fc\n",
"import datetime as dt\n",
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"data = pd.read_csv(\n",
" \"https://gist.githubusercontent.com/nstokoe/7d4717e96c21b8ad04ec91f361b000cb/raw/bf95a2e30fceb9f2ae990eac8379fc7d844a0196/weight-height.csv\", \n",
" dtype={\n",
" \"Gender\": str,\n",
" \"Height\": np.float64,\n",
" \"Weight\": np.float64\n",
" }\n",
")\n",
"\n",
"data[\"Gender\"] = data[\"Gender\"].apply(lambda x: 1.0 if x == \"Male\" else 0.0)\n",
"numeric_cols = [\"Height\", \"Weight\"]\n",
"for col in numeric_cols:\n",
" data[col] = StandardScaler().fit_transform(data[[col]])\n",
"\n",
"\n",
"train, test = train_test_split(data, test_size=0.33)\n",
"\n",
"x_train = train.drop(['Gender'],axis=1)\n",
"y_train = train['Gender']\n",
"\n",
"x_test = test.drop(['Gender'],axis=1)\n",
"y_test = test['Gender']\n",
"\n",
"Height = fc.numeric_column('Height')\n",
"Weight = fc.numeric_column('Weight')\n",
"feature_cols = [Height, Weight]"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"%load_ext tensorboard"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"BATCH_SIZE = 10 # 10\n",
"LEARNING_RATE = 0.01 # 0.01\n",
"EPOCHS = 2500\n",
"SHUFFLE_SIZE = 10000\n",
"\n",
"def train_input_fn():\n",
" dataset = tf.data.Dataset.from_tensor_slices((dict(x_train), y_train))\n",
" dataset = dataset.shuffle(SHUFFLE_SIZE).repeat().batch(BATCH_SIZE)\n",
" return dataset\n",
"\n",
"def eval_input_fn():\n",
" dataset = tf.data.Dataset.from_tensor_slices((dict(x_test), y_test))\n",
" return dataset.shuffle(SHUFFLE_SIZE).repeat().batch(BATCH_SIZE)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Using config: {'_model_dir': './models/', '_tf_random_seed': None, '_save_summary_steps': 10, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true\n",
"graph_options {\n",
" rewrite_options {\n",
" meta_optimizer_iterations: ONE\n",
" }\n",
"}\n",
", '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n",
"WARNING:tensorflow:From /Users/grahamthomson/VirtualEnvs/data_science/lib/python3.6/site-packages/tensorflow/python/ops/resource_variable_ops.py:1666: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"If using Keras pass *_constraint arguments to layers.\n",
"WARNING:tensorflow:From /Users/grahamthomson/VirtualEnvs/data_science/lib/python3.6/site-packages/tensorflow/python/training/training_util.py:236: Variable.initialized_value (from tensorflow.python.ops.variables) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.\n",
"INFO:tensorflow:Calling model_fn.\n",
"WARNING:tensorflow:Layer dnn is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2. The layer has dtype float32 because it's dtype defaults to floatx.\n",
"\n",
"If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n",
"\n",
"To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n",
"\n",
"INFO:tensorflow:Done calling model_fn.\n",
"INFO:tensorflow:Create CheckpointSaverHook.\n",
"INFO:tensorflow:Graph was finalized.\n",
"INFO:tensorflow:Restoring parameters from ./models/model.ckpt-5000\n",
"WARNING:tensorflow:From /Users/grahamthomson/VirtualEnvs/data_science/lib/python3.6/site-packages/tensorflow/python/training/saver.py:1077: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Use standard file utilities to get mtimes.\n",
"INFO:tensorflow:Running local_init_op.\n",
"INFO:tensorflow:Done running local_init_op.\n",
"INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 5000...\n",
"INFO:tensorflow:Saving checkpoints for 5000 into ./models/model.ckpt.\n",
"INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 5000...\n",
"INFO:tensorflow:loss = 0.15847513, step = 5000\n",
"INFO:tensorflow:global_step/sec: 297.143\n",
"INFO:tensorflow:loss = 0.1420708, step = 5100 (0.338 sec)\n",
"INFO:tensorflow:global_step/sec: 755.852\n",
"INFO:tensorflow:loss = 0.30615705, step = 5200 (0.132 sec)\n",
"INFO:tensorflow:global_step/sec: 719.777\n",
"INFO:tensorflow:loss = 0.18217091, step = 5300 (0.139 sec)\n",
"INFO:tensorflow:global_step/sec: 717.942\n",
"INFO:tensorflow:loss = 0.07128137, step = 5400 (0.139 sec)\n",
"INFO:tensorflow:global_step/sec: 779.405\n",
"INFO:tensorflow:loss = 0.22260666, step = 5500 (0.128 sec)\n",
"INFO:tensorflow:global_step/sec: 735.051\n",
"INFO:tensorflow:loss = 0.41146716, step = 5600 (0.136 sec)\n",
"INFO:tensorflow:global_step/sec: 580.596\n",
"INFO:tensorflow:loss = 0.04259679, step = 5700 (0.174 sec)\n",
"INFO:tensorflow:global_step/sec: 636.484\n",
"INFO:tensorflow:loss = 0.27729812, step = 5800 (0.155 sec)\n",
"INFO:tensorflow:global_step/sec: 794.376\n",
"INFO:tensorflow:loss = 0.16201207, step = 5900 (0.126 sec)\n",
"INFO:tensorflow:global_step/sec: 816.06\n",
"INFO:tensorflow:loss = 0.332717, step = 6000 (0.123 sec)\n",
"INFO:tensorflow:global_step/sec: 798.396\n",
"INFO:tensorflow:loss = 0.1754078, step = 6100 (0.126 sec)\n",
"INFO:tensorflow:global_step/sec: 746.353\n",
"INFO:tensorflow:loss = 0.043423492, step = 6200 (0.134 sec)\n",
"INFO:tensorflow:global_step/sec: 635.521\n",
"INFO:tensorflow:loss = 0.21691677, step = 6300 (0.157 sec)\n",
"INFO:tensorflow:global_step/sec: 533.812\n",
"INFO:tensorflow:loss = 0.15688688, step = 6400 (0.193 sec)\n",
"INFO:tensorflow:global_step/sec: 445.867\n",
"INFO:tensorflow:loss = 0.1762602, step = 6500 (0.218 sec)\n",
"INFO:tensorflow:global_step/sec: 597.133\n",
"INFO:tensorflow:loss = 0.35011774, step = 6600 (0.167 sec)\n",
"INFO:tensorflow:global_step/sec: 678.522\n",
"INFO:tensorflow:loss = 0.18194741, step = 6700 (0.148 sec)\n",
"INFO:tensorflow:global_step/sec: 868.713\n",
"INFO:tensorflow:loss = 0.09460559, step = 6800 (0.115 sec)\n",
"INFO:tensorflow:global_step/sec: 720.434\n",
"INFO:tensorflow:loss = 0.06633061, step = 6900 (0.139 sec)\n",
"INFO:tensorflow:global_step/sec: 811.959\n",
"INFO:tensorflow:loss = 0.28661516, step = 7000 (0.123 sec)\n",
"INFO:tensorflow:global_step/sec: 822.753\n",
"INFO:tensorflow:loss = 0.15061274, step = 7100 (0.121 sec)\n",
"INFO:tensorflow:global_step/sec: 723.878\n",
"INFO:tensorflow:loss = 0.14628488, step = 7200 (0.138 sec)\n",
"INFO:tensorflow:global_step/sec: 772.749\n",
"INFO:tensorflow:loss = 0.42421284, step = 7300 (0.129 sec)\n",
"INFO:tensorflow:global_step/sec: 877.101\n",
"INFO:tensorflow:loss = 0.07682868, step = 7400 (0.114 sec)\n",
"INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 7500...\n",
"INFO:tensorflow:Saving checkpoints for 7500 into ./models/model.ckpt.\n",
"INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 7500...\n",
"INFO:tensorflow:Loss for final step: 0.06063248.\n"
]
},
{
"data": {
"text/plain": [
"<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifierV2 at 0x140609748>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"optimizer_adam= tf.optimizers.Adam(learning_rate=LEARNING_RATE)\n",
"\n",
"\n",
"model=tf.estimator.DNNClassifier(\n",
" [30, 10],\n",
" model_dir=\"./models/\",\n",
" n_classes=2,\n",
" feature_columns=feature_cols, \n",
" optimizer=optimizer_adam,\n",
" config=tf.estimator.RunConfig().replace(save_summary_steps=10)\n",
")\n",
"\n",
"model.train(input_fn=lambda: train_input_fn(), steps=EPOCHS)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Calling model_fn.\n",
"WARNING:tensorflow:Layer dnn is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2. The layer has dtype float32 because it's dtype defaults to floatx.\n",
"\n",
"If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n",
"\n",
"To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n",
"\n",
"INFO:tensorflow:Done calling model_fn.\n",
"INFO:tensorflow:Starting evaluation at 2021-04-15T13:42:37Z\n",
"INFO:tensorflow:Graph was finalized.\n",
"INFO:tensorflow:Restoring parameters from ./models/model.ckpt-7500\n",
"INFO:tensorflow:Running local_init_op.\n",
"INFO:tensorflow:Done running local_init_op.\n",
"INFO:tensorflow:Evaluation [1/1]\n",
"INFO:tensorflow:Inference Time : 0.70016s\n",
"INFO:tensorflow:Finished evaluation at 2021-04-15-13:42:37\n",
"INFO:tensorflow:Saving dict for global step 7500: accuracy = 0.9, accuracy_baseline = 0.8, auc = 0.96875, auc_precision_recall = 0.99247503, average_loss = 0.22374408, global_step = 7500, label/mean = 0.8, loss = 0.22374408, precision = 0.8888889, prediction/mean = 0.8690305, recall = 1.0\n",
"INFO:tensorflow:Saving 'checkpoint_path' summary for global step 7500: ./models/model.ckpt-7500\n"
]
},
{
"data": {
"text/plain": [
"{'accuracy': 0.9,\n",
" 'accuracy_baseline': 0.8,\n",
" 'auc': 0.96875,\n",
" 'auc_precision_recall': 0.99247503,\n",
" 'average_loss': 0.22374408,\n",
" 'label/mean': 0.8,\n",
" 'loss': 0.22374408,\n",
" 'precision': 0.8888889,\n",
" 'prediction/mean': 0.8690305,\n",
" 'recall': 1.0,\n",
" 'global_step': 7500}"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"eval_results = model.evaluate(input_fn=eval_input_fn, steps=1)\n",
"eval_results"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <iframe id=\"tensorboard-frame-840eba33f6972967\" width=\"100%\" height=\"800\" frameborder=\"0\">\n",
" </iframe>\n",
" <script>\n",
" (function() {\n",
" const frame = document.getElementById(\"tensorboard-frame-840eba33f6972967\");\n",
" const url = new URL(\"/\", window.location);\n",
" url.port = 6008;\n",
" frame.src = url;\n",
" })();\n",
" </script>\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%tensorboard --logdir models"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.9181818181818182"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.metrics import accuracy_score\n",
"\n",
"lr = LogisticRegression().fit(x_train, y_train)\n",
"accuracy_score(y_test, lr.predict(x_test))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# g = {\"Height\": [72.5], \"Weight\": [180]}\n",
"# l = [1.0]\n",
"\n",
"# def pred_input_fn():\n",
"# dataset = tf.data.Dataset.from_tensor_slices((dict(g), l))\n",
"# return dataset\n",
"\n",
"# dir(model.predict(input_fn=pred_input_fn))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "data_science (venv)",
"language": "python",
"name": "data_science"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment