wstcpyt/decision_with_iris_datasets.ipynb

## decision_with_iris_datasets.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn import datasets\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "iris = datasets.load_iris()  # load iris dataset\n",
    "x = iris.data\n",
    "y = iris.target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "d = [{\"sepal_length\":row[0], \"sepal_width\":row[1], \"petal_length\":row[2], \"petal_width\":row[3]} for row in x]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame(d) # construct dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"types\"] = y # assign types"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.sample(frac=1.0) # random shuffle rows"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal_length</th>\n",
       "      <th>sepal_width</th>\n",
       "      <th>petal_length</th>\n",
       "      <th>petal_width</th>\n",
       "      <th>types</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>138</th>\n",
       "      <td>6.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.8</td>\n",
       "      <td>1.8</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>5.5</td>\n",
       "      <td>2.3</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.6</td>\n",
       "      <td>0.6</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>148</th>\n",
       "      <td>6.2</td>\n",
       "      <td>3.4</td>\n",
       "      <td>5.4</td>\n",
       "      <td>2.3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>5.6</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4.1</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     sepal_length  sepal_width  petal_length  petal_width  types\n",
       "138           6.0          3.0           4.8          1.8      2\n",
       "53            5.5          2.3           4.0          1.3      1\n",
       "43            5.0          3.5           1.6          0.6      0\n",
       "148           6.2          3.4           5.4          2.3      2\n",
       "88            5.6          3.0           4.1          1.3      1"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# train test split, ratio = 0.8\n",
    "features = df[[\"sepal_length\",\"sepal_width\",\"petal_length\",\"petal_width\"]]\n",
    "types = df[\"types\"]\n",
    "train_features, test_features, train_types, test_types = train_test_split(features,types,train_size=0.8, random_state=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "#scikit learn decision tree model trainig \n",
    "from sklearn import tree\n",
    "clf = tree.DecisionTreeClassifier()\n",
    "clf = clf.fit(train_features, train_types)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "prediction = clf.predict(test_features)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "       type0       1.00      1.00      1.00         7\n",
      "       type1       0.92      1.00      0.96        11\n",
      "       type2       1.00      0.92      0.96        12\n",
      "\n",
      "    accuracy                           0.97        30\n",
      "   macro avg       0.97      0.97      0.97        30\n",
      "weighted avg       0.97      0.97      0.97        30\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# evaluation for multi class classification\n",
    "from sklearn.metrics import classification_report\n",
    "print(classification_report(test_types, prediction, target_names=[\"type0\",\"type1\",\"type2\"]))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"from sklearn import datasets\n",
	"from sklearn.model_selection import train_test_split"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"iris = datasets.load_iris() # load iris dataset\n",
	"x = iris.data\n",
	"y = iris.target"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"d = [{\"sepal_length\":row[0], \"sepal_width\":row[1], \"petal_length\":row[2], \"petal_width\":row[3]} for row in x]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"df = pd.DataFrame(d) # construct dataframe"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"df[\"types\"] = y # assign types"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [],
	"source": [
	"df = df.sample(frac=1.0) # random shuffle rows"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>sepal_length</th>\n",
	" <th>sepal_width</th>\n",
	" <th>petal_length</th>\n",
	" <th>petal_width</th>\n",
	" <th>types</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>138</th>\n",
	" <td>6.0</td>\n",
	" <td>3.0</td>\n",
	" <td>4.8</td>\n",
	" <td>1.8</td>\n",
	" <td>2</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>53</th>\n",
	" <td>5.5</td>\n",
	" <td>2.3</td>\n",
	" <td>4.0</td>\n",
	" <td>1.3</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>43</th>\n",
	" <td>5.0</td>\n",
	" <td>3.5</td>\n",
	" <td>1.6</td>\n",
	" <td>0.6</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>148</th>\n",
	" <td>6.2</td>\n",
	" <td>3.4</td>\n",
	" <td>5.4</td>\n",
	" <td>2.3</td>\n",
	" <td>2</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>88</th>\n",
	" <td>5.6</td>\n",
	" <td>3.0</td>\n",
	" <td>4.1</td>\n",
	" <td>1.3</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" sepal_length sepal_width petal_length petal_width types\n",
	"138 6.0 3.0 4.8 1.8 2\n",
	"53 5.5 2.3 4.0 1.3 1\n",
	"43 5.0 3.5 1.6 0.6 0\n",
	"148 6.2 3.4 5.4 2.3 2\n",
	"88 5.6 3.0 4.1 1.3 1"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [],
	"source": [
	"# train test split, ratio = 0.8\n",
	"features = df[[\"sepal_length\",\"sepal_width\",\"petal_length\",\"petal_width\"]]\n",
	"types = df[\"types\"]\n",
	"train_features, test_features, train_types, test_types = train_test_split(features,types,train_size=0.8, random_state=1)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [],
	"source": [
	"#scikit learn decision tree model trainig \n",
	"from sklearn import tree\n",
	"clf = tree.DecisionTreeClassifier()\n",
	"clf = clf.fit(train_features, train_types)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [],
	"source": [
	"prediction = clf.predict(test_features)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	" precision recall f1-score support\n",
	"\n",
	" type0 1.00 1.00 1.00 7\n",
	" type1 0.92 1.00 0.96 11\n",
	" type2 1.00 0.92 0.96 12\n",
	"\n",
	" accuracy 0.97 30\n",
	" macro avg 0.97 0.97 0.97 30\n",
	"weighted avg 0.97 0.97 0.97 30\n",
	"\n"
	]
	}
	],
	"source": [
	"# evaluation for multi class classification\n",
	"from sklearn.metrics import classification_report\n",
	"print(classification_report(test_types, prediction, target_names=[\"type0\",\"type1\",\"type2\"]))"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.4"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}