RottenFruits/iris_3class.ipynb

## iris_3class.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "https://fabo.gitbooks.io/tensorflow-docs/model_logstic/tensorflow_three_classification_tensorboard.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# -*- coding: utf-8 -*-\n",
    "\"\"\"\n",
    "irisデータセットを使った3クラス分類\n",
    "\"\"\"\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import tensorflow as tf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "### データの準備\n",
    "# データセットの読み込み\n",
    "dataset = pd.read_csv(\"data/iris.csv\")\n",
    "# データセットの順序をランダムに並べ替える\n",
    "dataset = dataset.reindex(np.random.permutation(dataset.index)).reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def get_labels(dataset):\n",
    "    \"\"\"ラベル(正解データ)を1ofKベクトルに変換する\"\"\"\n",
    "    raw_labels = dataset.iloc[:,4]\n",
    "    labels = []\n",
    "    for l in raw_labels:\n",
    "        if l == \"Iris-setosa\":\n",
    "            labels.append([1.0,0.0,0.0])\n",
    "        elif l == \"Iris-versicolor\":\n",
    "            labels.append([0.0,1.0,0.0])\n",
    "        elif l == \"Iris-virginica\":\n",
    "            labels.append([0.0,0.0,1.0])\n",
    "    return np.array(labels)\n",
    "\n",
    "def get_data(dataset):\n",
    "    \"\"\"データセットをnparrayに変換する\"\"\"\n",
    "    raw_data = dataset.ix[:, :dataset.shape[1] - 1]\n",
    "    return np.array(raw_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/ogawashouhei/.pyenv/versions/anaconda3-4.0.0/envs/tf02/lib/python3.5/site-packages/ipykernel_launcher.py:16: DeprecationWarning: \n",
      ".ix is deprecated. Please use\n",
      ".loc for label based indexing or\n",
      ".iloc for positional indexing\n",
      "\n",
      "See the documentation here:\n",
      "http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated\n",
      "  app.launch_new_instance()\n"
     ]
    }
   ],
   "source": [
    "with tf.name_scope('dataset'):\n",
    "    # ラベル\n",
    "    labels = get_labels(dataset)\n",
    "    # データ\n",
    "    data = get_data(dataset)\n",
    "    # 訓練データとテストデータに分割する\n",
    "    # 訓練用データ\n",
    "    train_labels = labels[:120]\n",
    "    train_data = data[:120]\n",
    "    # テスト用データ\n",
    "    test_labels = labels[120:]\n",
    "    test_data = data[120:]\n",
    "\n",
    "\n",
    "\n",
    "### モデルをTensor形式で実装\n",
    "\n",
    "with tf.name_scope('ph'):\n",
    "    # ラベルを格納するPlaceholder\n",
    "    t = tf.placeholder(tf.float32, shape = (None, 3), name = \"y\")\n",
    "    # データを格納するPlaceholder\n",
    "    X = tf.placeholder(tf.float32, shape=(None, 4), name = \"X\")\n",
    "\n",
    "with tf.name_scope('hidden_layer1'):\n",
    "    # 隠れ層のノード数\n",
    "    node_num = 1024\n",
    "    w_hidden = tf.Variable(tf.truncated_normal([4, node_num]))\n",
    "    b_hidden = tf.Variable(tf.zeros([node_num]))\n",
    "    f_hidden = tf.matmul(X, w_hidden) + b_hidden\n",
    "    hidden_layer = tf.nn.relu(f_hidden)\n",
    "    tf.summary.histogram(\"Hidden_layer_wights\", w_hidden)\n",
    "    tf.summary.histogram(\"Hidden_layer_biases\", b_hidden)\n",
    "\n",
    "\n",
    "with tf.name_scope('output1'):\n",
    "    # 出力層\n",
    "    w_output = tf.Variable(tf.zeros([node_num,3]))\n",
    "    b_output = tf.Variable(tf.zeros([3]))\n",
    "    f_output = tf.matmul(hidden_layer, w_output) + b_output\n",
    "    p = tf.nn.softmax(f_output)\n",
    "    tf.summary.histogram(\"Output_layer_wights\", w_output)\n",
    "    tf.summary.histogram(\"Output_layer_wights\", b_output)\n",
    "\n",
    "\n",
    "\n",
    "with tf.name_scope('loss'):\n",
    "    # 誤差関数\n",
    "    cross_entropy = t * tf.log(p)\n",
    "    loss = -tf.reduce_mean(cross_entropy)\n",
    "    tf.summary.scalar('loss', loss)\n",
    "\n",
    "\n",
    "# トレーニングアルゴリズム\n",
    "# 勾配降下法 学習率0.001\n",
    "optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)\n",
    "train_step = optimizer.minimize(loss)\n",
    "\n",
    "with tf.name_scope('accuracy'):\n",
    "    # モデルの予測と正解が一致しているか調べる\n",
    "    correct_pred = tf.equal(tf.argmax(p, 1), tf.argmax(t, 1))\n",
    "    # モデルの精度\n",
    "    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))\n",
    "    tf.summary.scalar('accuracy', accuracy)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step: 200\n",
      "[Train] cost: 0.123277, acc: 0.808333\n",
      "Step: 400\n",
      "[Train] cost: 0.058310, acc: 0.966667\n",
      "Step: 600\n",
      "[Train] cost: 0.051282, acc: 0.966667\n",
      "Step: 800\n",
      "[Train] cost: 0.046961, acc: 0.966667\n",
      "Step: 1000\n",
      "[Train] cost: 0.044013, acc: 0.950000\n",
      "Step: 1200\n",
      "[Train] cost: 0.041868, acc: 0.950000\n",
      "Step: 1400\n",
      "[Train] cost: 0.040235, acc: 0.950000\n",
      "Step: 1600\n",
      "[Train] cost: 0.038950, acc: 0.958333\n",
      "Step: 1800\n",
      "[Train] cost: 0.037911, acc: 0.958333\n",
      "Step: 2000\n",
      "[Train] cost: 0.037054, acc: 0.958333\n"
     ]
    }
   ],
   "source": [
    "### 学習の実行\n",
    "with tf.Session() as sess:\n",
    "    # ログの設定\n",
    "    summary = tf.summary.merge_all()\n",
    "\n",
    "    writer = tf.summary.FileWriter(\"iris_cassification_log\", sess.graph)\n",
    "    sess.run(tf.global_variables_initializer())\n",
    "\n",
    "    i = 0\n",
    "    for _ in range(2000):\n",
    "        i += 1\n",
    "        # トレーニング\n",
    "        sess.run(train_step, feed_dict={X:train_data,t:train_labels})\n",
    "        # 200ステップごとに精度を出力\n",
    "        if i % 200 == 0:\n",
    "            # コストと精度を出力\n",
    "            train_summary, train_loss, train_acc = sess.run([summary, loss, accuracy], feed_dict={X:train_data,t:train_labels})\n",
    "            writer.add_summary(train_summary, i)\n",
    "            print(\"Step: %d\" % i)\n",
    "            print(\"[Train] cost: %f, acc: %f\" % (train_loss, train_acc))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%bash\n",
    "tensorboard --logdir /Users/ogawashouhei/Google\\ ドライブ/Study/tf/iris_cassification_log --port 6010"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"https://fabo.gitbooks.io/tensorflow-docs/model_logstic/tensorflow_three_classification_tensorboard.html"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# -- coding: utf-8 --\n",
	"\"\"\"\n",
	"irisデータセットを使った3クラス分類\n",
	"\"\"\"\n",
	"import numpy as np\n",
	"import pandas as pd\n",
	"import tensorflow as tf"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"### データの準備\n",
	"# データセットの読み込み\n",
	"dataset = pd.read_csv(\"data/iris.csv\")\n",
	"# データセットの順序をランダムに並べ替える\n",
	"dataset = dataset.reindex(np.random.permutation(dataset.index)).reset_index(drop=True)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def get_labels(dataset):\n",
	" \"\"\"ラベル(正解データ)を1ofKベクトルに変換する\"\"\"\n",
	" raw_labels = dataset.iloc[:,4]\n",
	" labels = []\n",
	" for l in raw_labels:\n",
	" if l == \"Iris-setosa\":\n",
	" labels.append([1.0,0.0,0.0])\n",
	" elif l == \"Iris-versicolor\":\n",
	" labels.append([0.0,1.0,0.0])\n",
	" elif l == \"Iris-virginica\":\n",
	" labels.append([0.0,0.0,1.0])\n",
	" return np.array(labels)\n",
	"\n",
	"def get_data(dataset):\n",
	" \"\"\"データセットをnparrayに変換する\"\"\"\n",
	" raw_data = dataset.ix[:, :dataset.shape[1] - 1]\n",
	" return np.array(raw_data)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/Users/ogawashouhei/.pyenv/versions/anaconda3-4.0.0/envs/tf02/lib/python3.5/site-packages/ipykernel_launcher.py:16: DeprecationWarning: \n",
	".ix is deprecated. Please use\n",
	".loc for label based indexing or\n",
	".iloc for positional indexing\n",
	"\n",
	"See the documentation here:\n",
	"http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated\n",
	" app.launch_new_instance()\n"
	]
	}
	],
	"source": [
	"with tf.name_scope('dataset'):\n",
	" # ラベル\n",
	" labels = get_labels(dataset)\n",
	" # データ\n",
	" data = get_data(dataset)\n",
	" # 訓練データとテストデータに分割する\n",
	" # 訓練用データ\n",
	" train_labels = labels[:120]\n",
	" train_data = data[:120]\n",
	" # テスト用データ\n",
	" test_labels = labels[120:]\n",
	" test_data = data[120:]\n",
	"\n",
	"\n",
	"\n",
	"### モデルをTensor形式で実装\n",
	"\n",
	"with tf.name_scope('ph'):\n",
	" # ラベルを格納するPlaceholder\n",
	" t = tf.placeholder(tf.float32, shape = (None, 3), name = \"y\")\n",
	" # データを格納するPlaceholder\n",
	" X = tf.placeholder(tf.float32, shape=(None, 4), name = \"X\")\n",
	"\n",
	"with tf.name_scope('hidden_layer1'):\n",
	" # 隠れ層のノード数\n",
	" node_num = 1024\n",
	" w_hidden = tf.Variable(tf.truncated_normal([4, node_num]))\n",
	" b_hidden = tf.Variable(tf.zeros([node_num]))\n",
	" f_hidden = tf.matmul(X, w_hidden) + b_hidden\n",
	" hidden_layer = tf.nn.relu(f_hidden)\n",
	" tf.summary.histogram(\"Hidden_layer_wights\", w_hidden)\n",
	" tf.summary.histogram(\"Hidden_layer_biases\", b_hidden)\n",
	"\n",
	"\n",
	"with tf.name_scope('output1'):\n",
	" # 出力層\n",
	" w_output = tf.Variable(tf.zeros([node_num,3]))\n",
	" b_output = tf.Variable(tf.zeros([3]))\n",
	" f_output = tf.matmul(hidden_layer, w_output) + b_output\n",
	" p = tf.nn.softmax(f_output)\n",
	" tf.summary.histogram(\"Output_layer_wights\", w_output)\n",
	" tf.summary.histogram(\"Output_layer_wights\", b_output)\n",
	"\n",
	"\n",
	"\n",
	"with tf.name_scope('loss'):\n",
	" # 誤差関数\n",
	" cross_entropy = t * tf.log(p)\n",
	" loss = -tf.reduce_mean(cross_entropy)\n",
	" tf.summary.scalar('loss', loss)\n",
	"\n",
	"\n",
	"# トレーニングアルゴリズム\n",
	"# 勾配降下法学習率0.001\n",
	"optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)\n",
	"train_step = optimizer.minimize(loss)\n",
	"\n",
	"with tf.name_scope('accuracy'):\n",
	" # モデルの予測と正解が一致しているか調べる\n",
	" correct_pred = tf.equal(tf.argmax(p, 1), tf.argmax(t, 1))\n",
	" # モデルの精度\n",
	" accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))\n",
	" tf.summary.scalar('accuracy', accuracy)\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Step: 200\n",
	"[Train] cost: 0.123277, acc: 0.808333\n",
	"Step: 400\n",
	"[Train] cost: 0.058310, acc: 0.966667\n",
	"Step: 600\n",
	"[Train] cost: 0.051282, acc: 0.966667\n",
	"Step: 800\n",
	"[Train] cost: 0.046961, acc: 0.966667\n",
	"Step: 1000\n",
	"[Train] cost: 0.044013, acc: 0.950000\n",
	"Step: 1200\n",
	"[Train] cost: 0.041868, acc: 0.950000\n",
	"Step: 1400\n",
	"[Train] cost: 0.040235, acc: 0.950000\n",
	"Step: 1600\n",
	"[Train] cost: 0.038950, acc: 0.958333\n",
	"Step: 1800\n",
	"[Train] cost: 0.037911, acc: 0.958333\n",
	"Step: 2000\n",
	"[Train] cost: 0.037054, acc: 0.958333\n"
	]
	}
	],
	"source": [
	"### 学習の実行\n",
	"with tf.Session() as sess:\n",
	" # ログの設定\n",
	" summary = tf.summary.merge_all()\n",
	"\n",
	" writer = tf.summary.FileWriter(\"iris_cassification_log\", sess.graph)\n",
	" sess.run(tf.global_variables_initializer())\n",
	"\n",
	" i = 0\n",
	" for _ in range(2000):\n",
	" i += 1\n",
	" # トレーニング\n",
	" sess.run(train_step, feed_dict={X:train_data,t:train_labels})\n",
	" # 200ステップごとに精度を出力\n",
	" if i % 200 == 0:\n",
	" # コストと精度を出力\n",
	" train_summary, train_loss, train_acc = sess.run([summary, loss, accuracy], feed_dict={X:train_data,t:train_labels})\n",
	" writer.add_summary(train_summary, i)\n",
	" print(\"Step: %d\" % i)\n",
	" print(\"[Train] cost: %f, acc: %f\" % (train_loss, train_acc))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"%%bash\n",
	"tensorboard --logdir /Users/ogawashouhei/Google\\ ドライブ/Study/tf/iris_cassification_log --port 6010"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.4"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}