Created
June 25, 2022 07:33
-
-
Save yssymmt/4d0e4e683a6c7e5174262f0590638da1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "a677a16f", | |
"metadata": {}, | |
"source": [ | |
"####PMMLモジュールのインストール" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"id": "c0e876b0", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Collecting package metadata (current_repodata.json): ...working... done\n", | |
"Solving environment: ...working... done\n", | |
"\n", | |
"## Package Plan ##\n", | |
"\n", | |
" environment location: C:\\Users\\yourdirectory\\Anaconda3\n", | |
"\n", | |
" added / updated specs:\n", | |
" - sklearn2pmml\n", | |
"\n", | |
"\n", | |
"The following packages will be downloaded:\n", | |
"\n", | |
" package | build\n", | |
" ---------------------------|-----------------\n", | |
" conda-4.13.0 | py39hcbf5309_1 1.0 MB conda-forge\n", | |
" python_abi-3.9 | 2_cp39 4 KB conda-forge\n", | |
" sklearn-pandas-2.2.0 | pyhd8ed1ab_0 13 KB conda-forge\n", | |
" sklearn2pmml-0.81.0 | pyhd8ed1ab_0 5.9 MB conda-forge\n", | |
" ------------------------------------------------------------\n", | |
" Total: 6.9 MB\n", | |
"\n", | |
"The following NEW packages will be INSTALLED:\n", | |
"\n", | |
" python_abi conda-forge/win-64::python_abi-3.9-2_cp39\n", | |
" sklearn-pandas conda-forge/noarch::sklearn-pandas-2.2.0-pyhd8ed1ab_0\n", | |
" sklearn2pmml conda-forge/noarch::sklearn2pmml-0.81.0-pyhd8ed1ab_0\n", | |
"\n", | |
"The following packages will be UPDATED:\n", | |
"\n", | |
" conda pkgs/main::conda-4.13.0-py39haa95532_0 --> conda-forge::conda-4.13.0-py39hcbf5309_1\n", | |
"\n", | |
"\n", | |
"\n", | |
"Downloading and Extracting Packages\n", | |
"\n", | |
"python_abi-3.9 | 4 KB | | 0% \n", | |
"python_abi-3.9 | 4 KB | ########## | 100% \n", | |
"python_abi-3.9 | 4 KB | ########## | 100% \n", | |
"\n", | |
"sklearn-pandas-2.2.0 | 13 KB | | 0% \n", | |
"sklearn-pandas-2.2.0 | 13 KB | ########## | 100% \n", | |
"sklearn-pandas-2.2.0 | 13 KB | ########## | 100% \n", | |
"\n", | |
"sklearn2pmml-0.81.0 | 5.9 MB | | 0% \n", | |
"sklearn2pmml-0.81.0 | 5.9 MB | | 0% \n", | |
"sklearn2pmml-0.81.0 | 5.9 MB | 1 | 2% \n", | |
"sklearn2pmml-0.81.0 | 5.9 MB | 2 | 3% \n", | |
"sklearn2pmml-0.81.0 | 5.9 MB | 6 | 6% \n", | |
"sklearn2pmml-0.81.0 | 5.9 MB | # | 11% \n", | |
"sklearn2pmml-0.81.0 | 5.9 MB | #7 | 17% \n", | |
"sklearn2pmml-0.81.0 | 5.9 MB | ##6 | 27% \n", | |
"sklearn2pmml-0.81.0 | 5.9 MB | ###9 | 40% \n", | |
"sklearn2pmml-0.81.0 | 5.9 MB | ###### | 61% \n", | |
"sklearn2pmml-0.81.0 | 5.9 MB | ########8 | 89% \n", | |
"sklearn2pmml-0.81.0 | 5.9 MB | ########## | 100% \n", | |
"\n", | |
"conda-4.13.0 | 1.0 MB | | 0% \n", | |
"conda-4.13.0 | 1.0 MB | ########## | 100% \n", | |
"conda-4.13.0 | 1.0 MB | ########## | 100% \n", | |
"Preparing transaction: ...working... done\n", | |
"Verifying transaction: ...working... done\n", | |
"Executing transaction: ...working... done\n", | |
"\n", | |
"Note: you may need to restart the kernel to use updated packages.\n" | |
] | |
} | |
], | |
"source": [ | |
"conda install -c conda-forge sklearn2pmml" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "18065645", | |
"metadata": {}, | |
"source": [ | |
"####ライブラリの読み込み" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "a5ace988", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from sklearn import datasets\n", | |
"import pandas as pd\n", | |
"from sqlalchemy import create_engine\n", | |
"import teradatasql\n", | |
"import teradatasqlalchemy\n", | |
"from sklearn.model_selection import train_test_split\n", | |
"import lightgbm as lgb \n", | |
"from sklearn.metrics import confusion_matrix\n", | |
"from sklearn.metrics import accuracy_score\n", | |
"from sklearn.metrics import classification_report\n", | |
"from sklearn2pmml import sklearn2pmml\n", | |
"from sklearn2pmml.pipeline import PMMLPipeline" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "3ecde84a", | |
"metadata": {}, | |
"source": [ | |
"####データフレームに変換" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "39c12785", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>sepal_length</th>\n", | |
" <th>sepal_width</th>\n", | |
" <th>petal_length</th>\n", | |
" <th>petal_width</th>\n", | |
" <th>targeto</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>5.1</td>\n", | |
" <td>3.5</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>4.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>4.7</td>\n", | |
" <td>3.2</td>\n", | |
" <td>1.3</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4.6</td>\n", | |
" <td>3.1</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5.0</td>\n", | |
" <td>3.6</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" sepal_length sepal_width petal_length petal_width targeto\n", | |
"0 5.1 3.5 1.4 0.2 setosa\n", | |
"1 4.9 3.0 1.4 0.2 setosa\n", | |
"2 4.7 3.2 1.3 0.2 setosa\n", | |
"3 4.6 3.1 1.5 0.2 setosa\n", | |
"4 5.0 3.6 1.4 0.2 setosa" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"iris = datasets.load_iris()\n", | |
"df = pd.DataFrame(iris.data, columns=iris.feature_names)\n", | |
"df['target'] = iris.target_names[iris.target]\n", | |
"df.columns = ['sepal_length','sepal_width','petal_length','petal_width','targeto']\n", | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "21ceeefc", | |
"metadata": {}, | |
"source": [ | |
"####ユニーク番号を付与、列順入替" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "ac6d8cce", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>serial_num</th>\n", | |
" <th>targeto</th>\n", | |
" <th>sepal_length</th>\n", | |
" <th>sepal_width</th>\n", | |
" <th>petal_length</th>\n", | |
" <th>petal_width</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>setosa</td>\n", | |
" <td>5.1</td>\n", | |
" <td>3.5</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>setosa</td>\n", | |
" <td>4.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>setosa</td>\n", | |
" <td>4.7</td>\n", | |
" <td>3.2</td>\n", | |
" <td>1.3</td>\n", | |
" <td>0.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>setosa</td>\n", | |
" <td>4.6</td>\n", | |
" <td>3.1</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5</td>\n", | |
" <td>setosa</td>\n", | |
" <td>5.0</td>\n", | |
" <td>3.6</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" serial_num targeto sepal_length sepal_width petal_length petal_width\n", | |
"0 1 setosa 5.1 3.5 1.4 0.2\n", | |
"1 2 setosa 4.9 3.0 1.4 0.2\n", | |
"2 3 setosa 4.7 3.2 1.3 0.2\n", | |
"3 4 setosa 4.6 3.1 1.5 0.2\n", | |
"4 5 setosa 5.0 3.6 1.4 0.2" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"serial_num = pd.RangeIndex(start=1, stop=len(df.index) + 1, step=1)\n", | |
"df['serial_num'] = serial_num\n", | |
"df = df.loc[:,['serial_num','targeto','sepal_length','sepal_width','petal_length','petal_width']]\n", | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "d03e0a8a", | |
"metadata": {}, | |
"source": [ | |
"####Teradataへの接続、sqlalchemy エンジンを作成" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "0e5e2ee2", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"host = \"123.456.789.0\"\n", | |
"user = \"jumbo\"\n", | |
"password = \"mambo\"\n", | |
"connstr = \"teradatasql://{user}:{password}@{host}\".format(host=host, user=user, password=password)\n", | |
"engine = create_engine(connstr)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "4ba3a7f2", | |
"metadata": {}, | |
"source": [ | |
"####空テーブル作成、元データ用 " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "b2a6ebb2", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with engine.connect() as conn:\n", | |
" x1 = pd.read_sql(\"\"\"\n", | |
" create multiset table jumbo.lgb01_iris (\n", | |
" serial_num integer, \n", | |
" targeto varchar(10) character set unicode, \n", | |
" sepal_length float, \n", | |
" sepal_width float, \n", | |
" petal_length float, \n", | |
" petal_width float \n", | |
" ) primary index (serial_num) \n", | |
" \"\"\", conn)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "ae1aeddf", | |
"metadata": {}, | |
"source": [ | |
"####元データの格納" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "59fbe566", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.to_sql('lgb01_iris',engine,if_exists='append',index=False)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "4ee90be4", | |
"metadata": {}, | |
"source": [ | |
"####格納を確認 " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "12ca1153", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>serial_num</th>\n", | |
" <th>targeto</th>\n", | |
" <th>sepal_length</th>\n", | |
" <th>sepal_width</th>\n", | |
" <th>petal_length</th>\n", | |
" <th>petal_width</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>setosa</td>\n", | |
" <td>5.1</td>\n", | |
" <td>3.5</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>setosa</td>\n", | |
" <td>4.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>setosa</td>\n", | |
" <td>4.7</td>\n", | |
" <td>3.2</td>\n", | |
" <td>1.3</td>\n", | |
" <td>0.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>setosa</td>\n", | |
" <td>4.6</td>\n", | |
" <td>3.1</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5</td>\n", | |
" <td>setosa</td>\n", | |
" <td>5.0</td>\n", | |
" <td>3.6</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>145</th>\n", | |
" <td>146</td>\n", | |
" <td>virginica</td>\n", | |
" <td>6.7</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.2</td>\n", | |
" <td>2.3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>146</th>\n", | |
" <td>147</td>\n", | |
" <td>virginica</td>\n", | |
" <td>6.3</td>\n", | |
" <td>2.5</td>\n", | |
" <td>5.0</td>\n", | |
" <td>1.9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>147</th>\n", | |
" <td>148</td>\n", | |
" <td>virginica</td>\n", | |
" <td>6.5</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.2</td>\n", | |
" <td>2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>148</th>\n", | |
" <td>149</td>\n", | |
" <td>virginica</td>\n", | |
" <td>6.2</td>\n", | |
" <td>3.4</td>\n", | |
" <td>5.4</td>\n", | |
" <td>2.3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>149</th>\n", | |
" <td>150</td>\n", | |
" <td>virginica</td>\n", | |
" <td>5.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.1</td>\n", | |
" <td>1.8</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>150 rows × 6 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" serial_num targeto sepal_length sepal_width petal_length \\\n", | |
"0 1 setosa 5.1 3.5 1.4 \n", | |
"1 2 setosa 4.9 3.0 1.4 \n", | |
"2 3 setosa 4.7 3.2 1.3 \n", | |
"3 4 setosa 4.6 3.1 1.5 \n", | |
"4 5 setosa 5.0 3.6 1.4 \n", | |
".. ... ... ... ... ... \n", | |
"145 146 virginica 6.7 3.0 5.2 \n", | |
"146 147 virginica 6.3 2.5 5.0 \n", | |
"147 148 virginica 6.5 3.0 5.2 \n", | |
"148 149 virginica 6.2 3.4 5.4 \n", | |
"149 150 virginica 5.9 3.0 5.1 \n", | |
"\n", | |
" petal_width \n", | |
"0 0.2 \n", | |
"1 0.2 \n", | |
"2 0.2 \n", | |
"3 0.2 \n", | |
"4 0.2 \n", | |
".. ... \n", | |
"145 2.3 \n", | |
"146 1.9 \n", | |
"147 2.0 \n", | |
"148 2.3 \n", | |
"149 1.8 \n", | |
"\n", | |
"[150 rows x 6 columns]" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"with engine.connect() as conn:\n", | |
" x2 = pd.read_sql(\"\"\"\n", | |
" select * from jumbo.lgb01_iris order by 1 \n", | |
" \"\"\", conn)\n", | |
"x2" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "ded7ca2b", | |
"metadata": {}, | |
"source": [ | |
"####説明変数と結果変数" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "b2121799", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>sepal_length</th>\n", | |
" <th>sepal_width</th>\n", | |
" <th>petal_length</th>\n", | |
" <th>petal_width</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>5.1</td>\n", | |
" <td>3.5</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>4.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>4.7</td>\n", | |
" <td>3.2</td>\n", | |
" <td>1.3</td>\n", | |
" <td>0.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4.6</td>\n", | |
" <td>3.1</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5.0</td>\n", | |
" <td>3.6</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" sepal_length sepal_width petal_length petal_width\n", | |
"0 5.1 3.5 1.4 0.2\n", | |
"1 4.9 3.0 1.4 0.2\n", | |
"2 4.7 3.2 1.3 0.2\n", | |
"3 4.6 3.1 1.5 0.2\n", | |
"4 5.0 3.6 1.4 0.2" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"X = x2.drop(['serial_num','targeto'], axis=1)\n", | |
"y = x2['targeto'].values\n", | |
"X.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "3ce30c3e", | |
"metadata": {}, | |
"source": [ | |
"####学習用、評価用への分割" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"id": "67dbcc4a", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=1)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "e6f92255", | |
"metadata": {}, | |
"source": [ | |
"####学習" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"id": "d0df4d2c", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"LGBMClassifier()" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model = lgb.LGBMClassifier() \n", | |
"model.fit(X_train, y_train) " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "cf6270aa", | |
"metadata": {}, | |
"source": [ | |
"####評価データの予測" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"id": "752d7448", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array(['setosa', 'versicolor', 'versicolor', 'setosa', 'virginica',\n", | |
" 'versicolor', 'virginica', 'setosa', 'setosa', 'virginica',\n", | |
" 'versicolor', 'setosa', 'virginica', 'versicolor', 'versicolor',\n", | |
" 'setosa', 'versicolor', 'versicolor', 'setosa', 'setosa',\n", | |
" 'versicolor', 'versicolor', 'virginica', 'setosa', 'virginica',\n", | |
" 'versicolor', 'setosa', 'setosa', 'versicolor', 'virginica',\n", | |
" 'versicolor', 'virginica', 'versicolor', 'virginica', 'virginica',\n", | |
" 'setosa', 'versicolor', 'setosa', 'versicolor', 'virginica',\n", | |
" 'virginica', 'setosa', 'versicolor', 'virginica', 'versicolor'],\n", | |
" dtype=object)" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y_pred = model.predict(X_test)\n", | |
"y_pred" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "f540de3c", | |
"metadata": {}, | |
"source": [ | |
"####評価データの予測確率" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"id": "aadd968f", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[9.99845068e-01, 1.54432218e-04, 4.99580208e-07],\n", | |
" [1.16351280e-03, 9.97668427e-01, 1.16806052e-03],\n", | |
" [2.39895684e-05, 9.94308872e-01, 5.66713798e-03],\n", | |
" [9.99951626e-01, 4.78728226e-05, 5.00792579e-07],\n", | |
" [6.32669869e-04, 6.39808284e-04, 9.98727522e-01],\n", | |
" [2.22526484e-04, 9.99538736e-01, 2.38737663e-04],\n", | |
" [7.59269593e-05, 1.22460832e-03, 9.98699465e-01],\n", | |
" [9.80755450e-01, 1.91575712e-02, 8.69787297e-05],\n", | |
" [9.99967609e-01, 3.18858996e-05, 5.04938586e-07],\n", | |
" [1.38078909e-04, 6.40124928e-04, 9.99221796e-01],\n", | |
" [7.05289192e-06, 9.99906324e-01, 8.66228872e-05],\n", | |
" [9.99951633e-01, 4.78728229e-05, 4.93949848e-07],\n", | |
" [1.74173849e-06, 1.98199322e-05, 9.99978438e-01],\n", | |
" [8.24526593e-05, 9.99751836e-01, 1.65711714e-04],\n", | |
" [4.59102552e-06, 9.99820763e-01, 1.74645565e-04],\n", | |
" [9.99967606e-01, 3.18858995e-05, 5.07881523e-07],\n", | |
" [4.56149536e-06, 9.99938450e-01, 5.69887569e-05],\n", | |
" [4.53029899e-05, 9.98287460e-01, 1.66723741e-03],\n", | |
" [9.99951626e-01, 4.78728226e-05, 5.00792579e-07],\n", | |
" [9.99967263e-01, 3.22367235e-05, 5.00211245e-07],\n", | |
" [8.46608214e-06, 9.99875679e-01, 1.15855232e-04],\n", | |
" [5.05433898e-05, 9.98045139e-01, 1.90431792e-03],\n", | |
" [2.29919926e-06, 2.63612702e-05, 9.99971340e-01],\n", | |
" [9.99967263e-01, 3.22367235e-05, 5.00211245e-07],\n", | |
" [1.37143803e-04, 6.40125527e-04, 9.99222731e-01],\n", | |
" [7.99860194e-06, 9.99751919e-01, 2.40082849e-04],\n", | |
" [9.99951626e-01, 4.78728226e-05, 5.00792579e-07],\n", | |
" [9.99967262e-01, 3.22367235e-05, 5.00800410e-07],\n", | |
" [2.81104395e-05, 9.95825634e-01, 4.14625563e-03],\n", | |
" [5.12052964e-06, 7.39520948e-05, 9.99920927e-01],\n", | |
" [2.52164108e-05, 9.95893607e-01, 4.08117635e-03],\n", | |
" [1.74165945e-06, 1.98199322e-05, 9.99978438e-01],\n", | |
" [6.11465116e-06, 9.99916377e-01, 7.75087568e-05],\n", | |
" [1.73615648e-06, 1.98199323e-05, 9.99978444e-01],\n", | |
" [5.83830969e-06, 1.77964109e-04, 9.99816198e-01],\n", | |
" [9.99951626e-01, 4.78728226e-05, 5.00792579e-07],\n", | |
" [4.53392657e-06, 9.99937967e-01, 5.74991400e-05],\n", | |
" [9.99951626e-01, 4.78728226e-05, 5.00792579e-07],\n", | |
" [2.56668004e-05, 9.93427955e-01, 6.54637770e-03],\n", | |
" [3.82630086e-06, 5.56021727e-05, 9.99940572e-01],\n", | |
" [1.74178131e-06, 1.98199322e-05, 9.99978438e-01],\n", | |
" [9.99967262e-01, 3.22367235e-05, 5.00800410e-07],\n", | |
" [2.38760325e-04, 9.63842806e-01, 3.59184340e-02],\n", | |
" [1.72547641e-06, 1.98199326e-05, 9.99978455e-01],\n", | |
" [2.49292497e-05, 9.94982107e-01, 4.99296385e-03]])" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y_pred_prob = model.predict_proba(X_test)\n", | |
"y_pred_prob" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "d5cc91ee", | |
"metadata": {}, | |
"source": [ | |
"####混合行列" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"id": "1e207007", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[14, 0, 0],\n", | |
" [ 0, 17, 1],\n", | |
" [ 0, 1, 12]], dtype=int64)" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"cm = confusion_matrix(y_test, y_pred)\n", | |
"cm" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "797d26ed", | |
"metadata": {}, | |
"source": [ | |
"####精度、正解率" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"id": "dfa71f9a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.9555555555555556" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ac = accuracy_score(y_test, y_pred)\n", | |
"ac" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "f21c8d60", | |
"metadata": {}, | |
"source": [ | |
"####性能評価レポート" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"id": "e2dd86fb", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" precision recall f1-score support\n", | |
"\n", | |
" setosa 1.00 1.00 1.00 14\n", | |
" versicolor 0.94 0.94 0.94 18\n", | |
" virginica 0.92 0.92 0.92 13\n", | |
"\n", | |
" accuracy 0.96 45\n", | |
" macro avg 0.96 0.96 0.96 45\n", | |
"weighted avg 0.96 0.96 0.96 45\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"clrp = classification_report(y_test, y_pred)\n", | |
"print(clrp)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "37800f6c", | |
"metadata": {}, | |
"source": [ | |
"####pmmlでのモデル作成" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"id": "05a7405f", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"PMMLPipeline(steps=[('classifier', LGBMClassifier())])" | |
] | |
}, | |
"execution_count": 19, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"clf = PMMLPipeline(\n", | |
" [\n", | |
" (\n", | |
" \"classifier\",\n", | |
" lgb.LGBMClassifier(),\n", | |
" )\n", | |
" ]\n", | |
")\n", | |
"\n", | |
"clf.fit(X_train, y_train)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "c07dc4f3", | |
"metadata": {}, | |
"source": [ | |
"####pmmlファイルのオフロード" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"id": "46e95e30", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"sklearn2pmml(clf, \"lgbm_model.pmml\", with_repr=True)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment