Skip to content

Instantly share code, notes, and snippets.

@wstcpyt
Last active April 7, 2020 00:43
Show Gist options
  • Save wstcpyt/be58eb410dbbb965589709250583123d to your computer and use it in GitHub Desktop.
Save wstcpyt/be58eb410dbbb965589709250583123d to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn import datasets\n",
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"iris = datasets.load_iris() # load iris dataset\n",
"x = iris.data\n",
"y = iris.target"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"d = [{\"sepal_length\":row[0], \"sepal_width\":row[1], \"petal_length\":row[2], \"petal_width\":row[3]} for row in x]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(d) # construct dataframe"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"df[\"types\"] = y # assign types"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"df = df.sample(frac=1.0) # random shuffle rows"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sepal_length</th>\n",
" <th>sepal_width</th>\n",
" <th>petal_length</th>\n",
" <th>petal_width</th>\n",
" <th>types</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>6.0</td>\n",
" <td>3.0</td>\n",
" <td>4.8</td>\n",
" <td>1.8</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>5.5</td>\n",
" <td>2.3</td>\n",
" <td>4.0</td>\n",
" <td>1.3</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>5.0</td>\n",
" <td>3.5</td>\n",
" <td>1.6</td>\n",
" <td>0.6</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>148</th>\n",
" <td>6.2</td>\n",
" <td>3.4</td>\n",
" <td>5.4</td>\n",
" <td>2.3</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>88</th>\n",
" <td>5.6</td>\n",
" <td>3.0</td>\n",
" <td>4.1</td>\n",
" <td>1.3</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sepal_length sepal_width petal_length petal_width types\n",
"138 6.0 3.0 4.8 1.8 2\n",
"53 5.5 2.3 4.0 1.3 1\n",
"43 5.0 3.5 1.6 0.6 0\n",
"148 6.2 3.4 5.4 2.3 2\n",
"88 5.6 3.0 4.1 1.3 1"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# train test split, ratio = 0.8\n",
"features = df[[\"sepal_length\",\"sepal_width\",\"petal_length\",\"petal_width\"]]\n",
"types = df[\"types\"]\n",
"train_features, test_features, train_types, test_types = train_test_split(features,types,train_size=0.8, random_state=1)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"#scikit learn decision tree model trainig \n",
"from sklearn import tree\n",
"clf = tree.DecisionTreeClassifier()\n",
"clf = clf.fit(train_features, train_types)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"prediction = clf.predict(test_features)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" type0 1.00 1.00 1.00 7\n",
" type1 0.92 1.00 0.96 11\n",
" type2 1.00 0.92 0.96 12\n",
"\n",
" accuracy 0.97 30\n",
" macro avg 0.97 0.97 0.97 30\n",
"weighted avg 0.97 0.97 0.97 30\n",
"\n"
]
}
],
"source": [
"# evaluation for multi class classification\n",
"from sklearn.metrics import classification_report\n",
"print(classification_report(test_types, prediction, target_names=[\"type0\",\"type1\",\"type2\"]))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment