yssymmt/lgb01_topmml.ipynb

## lgb01_topmml.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a677a16f",
   "metadata": {},
   "source": [
    "####PMMLモジュールのインストール"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "c0e876b0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting package metadata (current_repodata.json): ...working... done\n",
      "Solving environment: ...working... done\n",
      "\n",
      "## Package Plan ##\n",
      "\n",
      "  environment location: C:\\Users\\yourdirectory\\Anaconda3\n",
      "\n",
      "  added / updated specs:\n",
      "    - sklearn2pmml\n",
      "\n",
      "\n",
      "The following packages will be downloaded:\n",
      "\n",
      "    package                    |            build\n",
      "    ---------------------------|-----------------\n",
      "    conda-4.13.0               |   py39hcbf5309_1         1.0 MB  conda-forge\n",
      "    python_abi-3.9             |           2_cp39           4 KB  conda-forge\n",
      "    sklearn-pandas-2.2.0       |     pyhd8ed1ab_0          13 KB  conda-forge\n",
      "    sklearn2pmml-0.81.0        |     pyhd8ed1ab_0         5.9 MB  conda-forge\n",
      "    ------------------------------------------------------------\n",
      "                                           Total:         6.9 MB\n",
      "\n",
      "The following NEW packages will be INSTALLED:\n",
      "\n",
      "  python_abi         conda-forge/win-64::python_abi-3.9-2_cp39\n",
      "  sklearn-pandas     conda-forge/noarch::sklearn-pandas-2.2.0-pyhd8ed1ab_0\n",
      "  sklearn2pmml       conda-forge/noarch::sklearn2pmml-0.81.0-pyhd8ed1ab_0\n",
      "\n",
      "The following packages will be UPDATED:\n",
      "\n",
      "  conda              pkgs/main::conda-4.13.0-py39haa95532_0 --> conda-forge::conda-4.13.0-py39hcbf5309_1\n",
      "\n",
      "\n",
      "\n",
      "Downloading and Extracting Packages\n",
      "\n",
      "python_abi-3.9       | 4 KB      |            |   0% \n",
      "python_abi-3.9       | 4 KB      | ########## | 100% \n",
      "python_abi-3.9       | 4 KB      | ########## | 100% \n",
      "\n",
      "sklearn-pandas-2.2.0 | 13 KB     |            |   0% \n",
      "sklearn-pandas-2.2.0 | 13 KB     | ########## | 100% \n",
      "sklearn-pandas-2.2.0 | 13 KB     | ########## | 100% \n",
      "\n",
      "sklearn2pmml-0.81.0  | 5.9 MB    |            |   0% \n",
      "sklearn2pmml-0.81.0  | 5.9 MB    |            |   0% \n",
      "sklearn2pmml-0.81.0  | 5.9 MB    | 1          |   2% \n",
      "sklearn2pmml-0.81.0  | 5.9 MB    | 2          |   3% \n",
      "sklearn2pmml-0.81.0  | 5.9 MB    | 6          |   6% \n",
      "sklearn2pmml-0.81.0  | 5.9 MB    | #          |  11% \n",
      "sklearn2pmml-0.81.0  | 5.9 MB    | #7         |  17% \n",
      "sklearn2pmml-0.81.0  | 5.9 MB    | ##6        |  27% \n",
      "sklearn2pmml-0.81.0  | 5.9 MB    | ###9       |  40% \n",
      "sklearn2pmml-0.81.0  | 5.9 MB    | ######     |  61% \n",
      "sklearn2pmml-0.81.0  | 5.9 MB    | ########8  |  89% \n",
      "sklearn2pmml-0.81.0  | 5.9 MB    | ########## | 100% \n",
      "\n",
      "conda-4.13.0         | 1.0 MB    |            |   0% \n",
      "conda-4.13.0         | 1.0 MB    | ########## | 100% \n",
      "conda-4.13.0         | 1.0 MB    | ########## | 100% \n",
      "Preparing transaction: ...working... done\n",
      "Verifying transaction: ...working... done\n",
      "Executing transaction: ...working... done\n",
      "\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "conda install -c conda-forge sklearn2pmml"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "18065645",
   "metadata": {},
   "source": [
    "####ライブラリの読み込み"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "a5ace988",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import datasets\n",
    "import pandas as pd\n",
    "from sqlalchemy import create_engine\n",
    "import teradatasql\n",
    "import teradatasqlalchemy\n",
    "from sklearn.model_selection import train_test_split\n",
    "import lightgbm as lgb \n",
    "from sklearn.metrics import confusion_matrix\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn2pmml import sklearn2pmml\n",
    "from sklearn2pmml.pipeline import PMMLPipeline"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3ecde84a",
   "metadata": {},
   "source": [
    "####データフレームに変換"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "39c12785",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal_length</th>\n",
       "      <th>sepal_width</th>\n",
       "      <th>petal_length</th>\n",
       "      <th>petal_width</th>\n",
       "      <th>targeto</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4.6</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>setosa</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sepal_length  sepal_width  petal_length  petal_width targeto\n",
       "0           5.1          3.5           1.4          0.2  setosa\n",
       "1           4.9          3.0           1.4          0.2  setosa\n",
       "2           4.7          3.2           1.3          0.2  setosa\n",
       "3           4.6          3.1           1.5          0.2  setosa\n",
       "4           5.0          3.6           1.4          0.2  setosa"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris = datasets.load_iris()\n",
    "df = pd.DataFrame(iris.data, columns=iris.feature_names)\n",
    "df['target'] = iris.target_names[iris.target]\n",
    "df.columns = ['sepal_length','sepal_width','petal_length','petal_width','targeto']\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "21ceeefc",
   "metadata": {},
   "source": [
    "####ユニーク番号を付与、列順入替"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "ac6d8cce",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>serial_num</th>\n",
       "      <th>targeto</th>\n",
       "      <th>sepal_length</th>\n",
       "      <th>sepal_width</th>\n",
       "      <th>petal_length</th>\n",
       "      <th>petal_width</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>setosa</td>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>setosa</td>\n",
       "      <td>4.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>setosa</td>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>setosa</td>\n",
       "      <td>4.6</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>setosa</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   serial_num targeto  sepal_length  sepal_width  petal_length  petal_width\n",
       "0           1  setosa           5.1          3.5           1.4          0.2\n",
       "1           2  setosa           4.9          3.0           1.4          0.2\n",
       "2           3  setosa           4.7          3.2           1.3          0.2\n",
       "3           4  setosa           4.6          3.1           1.5          0.2\n",
       "4           5  setosa           5.0          3.6           1.4          0.2"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "serial_num = pd.RangeIndex(start=1, stop=len(df.index) + 1, step=1)\n",
    "df['serial_num'] = serial_num\n",
    "df = df.loc[:,['serial_num','targeto','sepal_length','sepal_width','petal_length','petal_width']]\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d03e0a8a",
   "metadata": {},
   "source": [
    "####Teradataへの接続、sqlalchemy エンジンを作成"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "0e5e2ee2",
   "metadata": {},
   "outputs": [],
   "source": [
    "host = \"123.456.789.0\"\n",
    "user = \"jumbo\"\n",
    "password = \"mambo\"\n",
    "connstr = \"teradatasql://{user}:{password}@{host}\".format(host=host, user=user, password=password)\n",
    "engine = create_engine(connstr)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4ba3a7f2",
   "metadata": {},
   "source": [
    "####空テーブル作成、元データ用 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "b2a6ebb2",
   "metadata": {},
   "outputs": [],
   "source": [
    "with engine.connect() as conn:\n",
    "  x1 = pd.read_sql(\"\"\"\n",
    "  create multiset table jumbo.lgb01_iris (\n",
    "  serial_num integer, \n",
    "  targeto varchar(10) character set unicode, \n",
    "  sepal_length float, \n",
    "  sepal_width float, \n",
    "  petal_length float, \n",
    "  petal_width float \n",
    "  ) primary index (serial_num) \n",
    "  \"\"\", conn)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ae1aeddf",
   "metadata": {},
   "source": [
    "####元データの格納"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "59fbe566",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.to_sql('lgb01_iris',engine,if_exists='append',index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4ee90be4",
   "metadata": {},
   "source": [
    "####格納を確認 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "12ca1153",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>serial_num</th>\n",
       "      <th>targeto</th>\n",
       "      <th>sepal_length</th>\n",
       "      <th>sepal_width</th>\n",
       "      <th>petal_length</th>\n",
       "      <th>petal_width</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>setosa</td>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>setosa</td>\n",
       "      <td>4.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>setosa</td>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>setosa</td>\n",
       "      <td>4.6</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>setosa</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>145</th>\n",
       "      <td>146</td>\n",
       "      <td>virginica</td>\n",
       "      <td>6.7</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.2</td>\n",
       "      <td>2.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>146</th>\n",
       "      <td>147</td>\n",
       "      <td>virginica</td>\n",
       "      <td>6.3</td>\n",
       "      <td>2.5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>147</th>\n",
       "      <td>148</td>\n",
       "      <td>virginica</td>\n",
       "      <td>6.5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.2</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>148</th>\n",
       "      <td>149</td>\n",
       "      <td>virginica</td>\n",
       "      <td>6.2</td>\n",
       "      <td>3.4</td>\n",
       "      <td>5.4</td>\n",
       "      <td>2.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149</th>\n",
       "      <td>150</td>\n",
       "      <td>virginica</td>\n",
       "      <td>5.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     serial_num    targeto  sepal_length  sepal_width  petal_length  \\\n",
       "0             1     setosa           5.1          3.5           1.4   \n",
       "1             2     setosa           4.9          3.0           1.4   \n",
       "2             3     setosa           4.7          3.2           1.3   \n",
       "3             4     setosa           4.6          3.1           1.5   \n",
       "4             5     setosa           5.0          3.6           1.4   \n",
       "..          ...        ...           ...          ...           ...   \n",
       "145         146  virginica           6.7          3.0           5.2   \n",
       "146         147  virginica           6.3          2.5           5.0   \n",
       "147         148  virginica           6.5          3.0           5.2   \n",
       "148         149  virginica           6.2          3.4           5.4   \n",
       "149         150  virginica           5.9          3.0           5.1   \n",
       "\n",
       "     petal_width  \n",
       "0            0.2  \n",
       "1            0.2  \n",
       "2            0.2  \n",
       "3            0.2  \n",
       "4            0.2  \n",
       "..           ...  \n",
       "145          2.3  \n",
       "146          1.9  \n",
       "147          2.0  \n",
       "148          2.3  \n",
       "149          1.8  \n",
       "\n",
       "[150 rows x 6 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with engine.connect() as conn:\n",
    "  x2 = pd.read_sql(\"\"\"\n",
    "  select * from  jumbo.lgb01_iris order by 1 \n",
    "  \"\"\", conn)\n",
    "x2"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ded7ca2b",
   "metadata": {},
   "source": [
    "####説明変数と結果変数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "b2121799",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal_length</th>\n",
       "      <th>sepal_width</th>\n",
       "      <th>petal_length</th>\n",
       "      <th>petal_width</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4.6</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sepal_length  sepal_width  petal_length  petal_width\n",
       "0           5.1          3.5           1.4          0.2\n",
       "1           4.9          3.0           1.4          0.2\n",
       "2           4.7          3.2           1.3          0.2\n",
       "3           4.6          3.1           1.5          0.2\n",
       "4           5.0          3.6           1.4          0.2"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = x2.drop(['serial_num','targeto'], axis=1)\n",
    "y = x2['targeto'].values\n",
    "X.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3ce30c3e",
   "metadata": {},
   "source": [
    "####学習用、評価用への分割"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "67dbcc4a",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e6f92255",
   "metadata": {},
   "source": [
    "####学習"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "d0df4d2c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LGBMClassifier()"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = lgb.LGBMClassifier() \n",
    "model.fit(X_train, y_train) "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cf6270aa",
   "metadata": {},
   "source": [
    "####評価データの予測"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "752d7448",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['setosa', 'versicolor', 'versicolor', 'setosa', 'virginica',\n",
       "       'versicolor', 'virginica', 'setosa', 'setosa', 'virginica',\n",
       "       'versicolor', 'setosa', 'virginica', 'versicolor', 'versicolor',\n",
       "       'setosa', 'versicolor', 'versicolor', 'setosa', 'setosa',\n",
       "       'versicolor', 'versicolor', 'virginica', 'setosa', 'virginica',\n",
       "       'versicolor', 'setosa', 'setosa', 'versicolor', 'virginica',\n",
       "       'versicolor', 'virginica', 'versicolor', 'virginica', 'virginica',\n",
       "       'setosa', 'versicolor', 'setosa', 'versicolor', 'virginica',\n",
       "       'virginica', 'setosa', 'versicolor', 'virginica', 'versicolor'],\n",
       "      dtype=object)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_pred = model.predict(X_test)\n",
    "y_pred"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f540de3c",
   "metadata": {},
   "source": [
    "####評価データの予測確率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "aadd968f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[9.99845068e-01, 1.54432218e-04, 4.99580208e-07],\n",
       "       [1.16351280e-03, 9.97668427e-01, 1.16806052e-03],\n",
       "       [2.39895684e-05, 9.94308872e-01, 5.66713798e-03],\n",
       "       [9.99951626e-01, 4.78728226e-05, 5.00792579e-07],\n",
       "       [6.32669869e-04, 6.39808284e-04, 9.98727522e-01],\n",
       "       [2.22526484e-04, 9.99538736e-01, 2.38737663e-04],\n",
       "       [7.59269593e-05, 1.22460832e-03, 9.98699465e-01],\n",
       "       [9.80755450e-01, 1.91575712e-02, 8.69787297e-05],\n",
       "       [9.99967609e-01, 3.18858996e-05, 5.04938586e-07],\n",
       "       [1.38078909e-04, 6.40124928e-04, 9.99221796e-01],\n",
       "       [7.05289192e-06, 9.99906324e-01, 8.66228872e-05],\n",
       "       [9.99951633e-01, 4.78728229e-05, 4.93949848e-07],\n",
       "       [1.74173849e-06, 1.98199322e-05, 9.99978438e-01],\n",
       "       [8.24526593e-05, 9.99751836e-01, 1.65711714e-04],\n",
       "       [4.59102552e-06, 9.99820763e-01, 1.74645565e-04],\n",
       "       [9.99967606e-01, 3.18858995e-05, 5.07881523e-07],\n",
       "       [4.56149536e-06, 9.99938450e-01, 5.69887569e-05],\n",
       "       [4.53029899e-05, 9.98287460e-01, 1.66723741e-03],\n",
       "       [9.99951626e-01, 4.78728226e-05, 5.00792579e-07],\n",
       "       [9.99967263e-01, 3.22367235e-05, 5.00211245e-07],\n",
       "       [8.46608214e-06, 9.99875679e-01, 1.15855232e-04],\n",
       "       [5.05433898e-05, 9.98045139e-01, 1.90431792e-03],\n",
       "       [2.29919926e-06, 2.63612702e-05, 9.99971340e-01],\n",
       "       [9.99967263e-01, 3.22367235e-05, 5.00211245e-07],\n",
       "       [1.37143803e-04, 6.40125527e-04, 9.99222731e-01],\n",
       "       [7.99860194e-06, 9.99751919e-01, 2.40082849e-04],\n",
       "       [9.99951626e-01, 4.78728226e-05, 5.00792579e-07],\n",
       "       [9.99967262e-01, 3.22367235e-05, 5.00800410e-07],\n",
       "       [2.81104395e-05, 9.95825634e-01, 4.14625563e-03],\n",
       "       [5.12052964e-06, 7.39520948e-05, 9.99920927e-01],\n",
       "       [2.52164108e-05, 9.95893607e-01, 4.08117635e-03],\n",
       "       [1.74165945e-06, 1.98199322e-05, 9.99978438e-01],\n",
       "       [6.11465116e-06, 9.99916377e-01, 7.75087568e-05],\n",
       "       [1.73615648e-06, 1.98199323e-05, 9.99978444e-01],\n",
       "       [5.83830969e-06, 1.77964109e-04, 9.99816198e-01],\n",
       "       [9.99951626e-01, 4.78728226e-05, 5.00792579e-07],\n",
       "       [4.53392657e-06, 9.99937967e-01, 5.74991400e-05],\n",
       "       [9.99951626e-01, 4.78728226e-05, 5.00792579e-07],\n",
       "       [2.56668004e-05, 9.93427955e-01, 6.54637770e-03],\n",
       "       [3.82630086e-06, 5.56021727e-05, 9.99940572e-01],\n",
       "       [1.74178131e-06, 1.98199322e-05, 9.99978438e-01],\n",
       "       [9.99967262e-01, 3.22367235e-05, 5.00800410e-07],\n",
       "       [2.38760325e-04, 9.63842806e-01, 3.59184340e-02],\n",
       "       [1.72547641e-06, 1.98199326e-05, 9.99978455e-01],\n",
       "       [2.49292497e-05, 9.94982107e-01, 4.99296385e-03]])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_pred_prob = model.predict_proba(X_test)\n",
    "y_pred_prob"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d5cc91ee",
   "metadata": {},
   "source": [
    "####混合行列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "1e207007",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[14,  0,  0],\n",
       "       [ 0, 17,  1],\n",
       "       [ 0,  1, 12]], dtype=int64)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cm = confusion_matrix(y_test, y_pred)\n",
    "cm"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "797d26ed",
   "metadata": {},
   "source": [
    "####精度、正解率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "dfa71f9a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9555555555555556"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ac = accuracy_score(y_test, y_pred)\n",
    "ac"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f21c8d60",
   "metadata": {},
   "source": [
    "####性能評価レポート"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "e2dd86fb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "      setosa       1.00      1.00      1.00        14\n",
      "  versicolor       0.94      0.94      0.94        18\n",
      "   virginica       0.92      0.92      0.92        13\n",
      "\n",
      "    accuracy                           0.96        45\n",
      "   macro avg       0.96      0.96      0.96        45\n",
      "weighted avg       0.96      0.96      0.96        45\n",
      "\n"
     ]
    }
   ],
   "source": [
    "clrp = classification_report(y_test, y_pred)\n",
    "print(clrp)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "37800f6c",
   "metadata": {},
   "source": [
    "####pmmlでのモデル作成"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "05a7405f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PMMLPipeline(steps=[('classifier', LGBMClassifier())])"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf = PMMLPipeline(\n",
    "    [\n",
    "        (\n",
    "            \"classifier\",\n",
    "            lgb.LGBMClassifier(),\n",
    "        )\n",
    "    ]\n",
    ")\n",
    "\n",
    "clf.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c07dc4f3",
   "metadata": {},
   "source": [
    "####pmmlファイルのオフロード"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "46e95e30",
   "metadata": {},
   "outputs": [],
   "source": [
    "sklearn2pmml(clf, \"lgbm_model.pmml\", with_repr=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}