Skip to content

Instantly share code, notes, and snippets.

@mylamour mylamour/feature_engine.ipynb Secret
Created Apr 24, 2018

Embed
What would you like to do?
[特征工程教程] #python #jupynotebook
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Learn from https://www.cnblogs.com/jasonfreak/p/5448385.html\n",
"# Note And Tutorial\n",
"\n",
"%matplotlib inline\n",
"\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.datasets import load_iris"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 5.1 3.5 1.4 0.2]\n",
" [ 4.9 3. 1.4 0.2]\n",
" [ 4.7 3.2 1.3 0.2]\n",
" [ 4.6 3.1 1.5 0.2]\n",
" [ 5. 3.6 1.4 0.2]\n",
" [ 5.4 3.9 1.7 0.4]\n",
" [ 4.6 3.4 1.4 0.3]\n",
" [ 5. 3.4 1.5 0.2]\n",
" [ 4.4 2.9 1.4 0.2]\n",
" [ 4.9 3.1 1.5 0.1]\n",
" [ 5.4 3.7 1.5 0.2]\n",
" [ 4.8 3.4 1.6 0.2]\n",
" [ 4.8 3. 1.4 0.1]\n",
" [ 4.3 3. 1.1 0.1]\n",
" [ 5.8 4. 1.2 0.2]\n",
" [ 5.7 4.4 1.5 0.4]\n",
" [ 5.4 3.9 1.3 0.4]\n",
" [ 5.1 3.5 1.4 0.3]\n",
" [ 5.7 3.8 1.7 0.3]\n",
" [ 5.1 3.8 1.5 0.3]\n",
" [ 5.4 3.4 1.7 0.2]\n",
" [ 5.1 3.7 1.5 0.4]\n",
" [ 4.6 3.6 1. 0.2]\n",
" [ 5.1 3.3 1.7 0.5]\n",
" [ 4.8 3.4 1.9 0.2]\n",
" [ 5. 3. 1.6 0.2]\n",
" [ 5. 3.4 1.6 0.4]\n",
" [ 5.2 3.5 1.5 0.2]\n",
" [ 5.2 3.4 1.4 0.2]\n",
" [ 4.7 3.2 1.6 0.2]\n",
" [ 4.8 3.1 1.6 0.2]\n",
" [ 5.4 3.4 1.5 0.4]\n",
" [ 5.2 4.1 1.5 0.1]\n",
" [ 5.5 4.2 1.4 0.2]\n",
" [ 4.9 3.1 1.5 0.1]\n",
" [ 5. 3.2 1.2 0.2]\n",
" [ 5.5 3.5 1.3 0.2]\n",
" [ 4.9 3.1 1.5 0.1]\n",
" [ 4.4 3. 1.3 0.2]\n",
" [ 5.1 3.4 1.5 0.2]\n",
" [ 5. 3.5 1.3 0.3]\n",
" [ 4.5 2.3 1.3 0.3]\n",
" [ 4.4 3.2 1.3 0.2]\n",
" [ 5. 3.5 1.6 0.6]\n",
" [ 5.1 3.8 1.9 0.4]\n",
" [ 4.8 3. 1.4 0.3]\n",
" [ 5.1 3.8 1.6 0.2]\n",
" [ 4.6 3.2 1.4 0.2]\n",
" [ 5.3 3.7 1.5 0.2]\n",
" [ 5. 3.3 1.4 0.2]\n",
" [ 7. 3.2 4.7 1.4]\n",
" [ 6.4 3.2 4.5 1.5]\n",
" [ 6.9 3.1 4.9 1.5]\n",
" [ 5.5 2.3 4. 1.3]\n",
" [ 6.5 2.8 4.6 1.5]\n",
" [ 5.7 2.8 4.5 1.3]\n",
" [ 6.3 3.3 4.7 1.6]\n",
" [ 4.9 2.4 3.3 1. ]\n",
" [ 6.6 2.9 4.6 1.3]\n",
" [ 5.2 2.7 3.9 1.4]\n",
" [ 5. 2. 3.5 1. ]\n",
" [ 5.9 3. 4.2 1.5]\n",
" [ 6. 2.2 4. 1. ]\n",
" [ 6.1 2.9 4.7 1.4]\n",
" [ 5.6 2.9 3.6 1.3]\n",
" [ 6.7 3.1 4.4 1.4]\n",
" [ 5.6 3. 4.5 1.5]\n",
" [ 5.8 2.7 4.1 1. ]\n",
" [ 6.2 2.2 4.5 1.5]\n",
" [ 5.6 2.5 3.9 1.1]\n",
" [ 5.9 3.2 4.8 1.8]\n",
" [ 6.1 2.8 4. 1.3]\n",
" [ 6.3 2.5 4.9 1.5]\n",
" [ 6.1 2.8 4.7 1.2]\n",
" [ 6.4 2.9 4.3 1.3]\n",
" [ 6.6 3. 4.4 1.4]\n",
" [ 6.8 2.8 4.8 1.4]\n",
" [ 6.7 3. 5. 1.7]\n",
" [ 6. 2.9 4.5 1.5]\n",
" [ 5.7 2.6 3.5 1. ]\n",
" [ 5.5 2.4 3.8 1.1]\n",
" [ 5.5 2.4 3.7 1. ]\n",
" [ 5.8 2.7 3.9 1.2]\n",
" [ 6. 2.7 5.1 1.6]\n",
" [ 5.4 3. 4.5 1.5]\n",
" [ 6. 3.4 4.5 1.6]\n",
" [ 6.7 3.1 4.7 1.5]\n",
" [ 6.3 2.3 4.4 1.3]\n",
" [ 5.6 3. 4.1 1.3]\n",
" [ 5.5 2.5 4. 1.3]\n",
" [ 5.5 2.6 4.4 1.2]\n",
" [ 6.1 3. 4.6 1.4]\n",
" [ 5.8 2.6 4. 1.2]\n",
" [ 5. 2.3 3.3 1. ]\n",
" [ 5.6 2.7 4.2 1.3]\n",
" [ 5.7 3. 4.2 1.2]\n",
" [ 5.7 2.9 4.2 1.3]\n",
" [ 6.2 2.9 4.3 1.3]\n",
" [ 5.1 2.5 3. 1.1]\n",
" [ 5.7 2.8 4.1 1.3]\n",
" [ 6.3 3.3 6. 2.5]\n",
" [ 5.8 2.7 5.1 1.9]\n",
" [ 7.1 3. 5.9 2.1]\n",
" [ 6.3 2.9 5.6 1.8]\n",
" [ 6.5 3. 5.8 2.2]\n",
" [ 7.6 3. 6.6 2.1]\n",
" [ 4.9 2.5 4.5 1.7]\n",
" [ 7.3 2.9 6.3 1.8]\n",
" [ 6.7 2.5 5.8 1.8]\n",
" [ 7.2 3.6 6.1 2.5]\n",
" [ 6.5 3.2 5.1 2. ]\n",
" [ 6.4 2.7 5.3 1.9]\n",
" [ 6.8 3. 5.5 2.1]\n",
" [ 5.7 2.5 5. 2. ]\n",
" [ 5.8 2.8 5.1 2.4]\n",
" [ 6.4 3.2 5.3 2.3]\n",
" [ 6.5 3. 5.5 1.8]\n",
" [ 7.7 3.8 6.7 2.2]\n",
" [ 7.7 2.6 6.9 2.3]\n",
" [ 6. 2.2 5. 1.5]\n",
" [ 6.9 3.2 5.7 2.3]\n",
" [ 5.6 2.8 4.9 2. ]\n",
" [ 7.7 2.8 6.7 2. ]\n",
" [ 6.3 2.7 4.9 1.8]\n",
" [ 6.7 3.3 5.7 2.1]\n",
" [ 7.2 3.2 6. 1.8]\n",
" [ 6.2 2.8 4.8 1.8]\n",
" [ 6.1 3. 4.9 1.8]\n",
" [ 6.4 2.8 5.6 2.1]\n",
" [ 7.2 3. 5.8 1.6]\n",
" [ 7.4 2.8 6.1 1.9]\n",
" [ 7.9 3.8 6.4 2. ]\n",
" [ 6.4 2.8 5.6 2.2]\n",
" [ 6.3 2.8 5.1 1.5]\n",
" [ 6.1 2.6 5.6 1.4]\n",
" [ 7.7 3. 6.1 2.3]\n",
" [ 6.3 3.4 5.6 2.4]\n",
" [ 6.4 3.1 5.5 1.8]\n",
" [ 6. 3. 4.8 1.8]\n",
" [ 6.9 3.1 5.4 2.1]\n",
" [ 6.7 3.1 5.6 2.4]\n",
" [ 6.9 3.1 5.1 2.3]\n",
" [ 5.8 2.7 5.1 1.9]\n",
" [ 6.8 3.2 5.9 2.3]\n",
" [ 6.7 3.3 5.7 2.5]\n",
" [ 6.7 3. 5.2 2.3]\n",
" [ 6.3 2.5 5. 1.9]\n",
" [ 6.5 3. 5.2 2. ]\n",
" [ 6.2 3.4 5.4 2.3]\n",
" [ 5.9 3. 5.1 1.8]]\n",
"[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2\n",
" 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2\n",
" 2 2]\n"
]
}
],
"source": [
"iris = load_iris()\n",
"print(iris.data)\n",
"print(iris.target)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 数据预处理"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ -9.00681170e-01, 1.03205722e+00, -1.34127240e+00,\n",
" -1.31297673e+00],\n",
" [ -1.14301691e+00, -1.24957601e-01, -1.34127240e+00,\n",
" -1.31297673e+00],\n",
" [ -1.38535265e+00, 3.37848329e-01, -1.39813811e+00,\n",
" -1.31297673e+00],\n",
" [ -1.50652052e+00, 1.06445364e-01, -1.28440670e+00,\n",
" -1.31297673e+00],\n",
" [ -1.02184904e+00, 1.26346019e+00, -1.34127240e+00,\n",
" -1.31297673e+00],\n",
" [ -5.37177559e-01, 1.95766909e+00, -1.17067529e+00,\n",
" -1.05003079e+00],\n",
" [ -1.50652052e+00, 8.00654259e-01, -1.34127240e+00,\n",
" -1.18150376e+00],\n",
" [ -1.02184904e+00, 8.00654259e-01, -1.28440670e+00,\n",
" -1.31297673e+00],\n",
" [ -1.74885626e+00, -3.56360566e-01, -1.34127240e+00,\n",
" -1.31297673e+00],\n",
" [ -1.14301691e+00, 1.06445364e-01, -1.28440670e+00,\n",
" -1.44444970e+00],\n",
" [ -5.37177559e-01, 1.49486315e+00, -1.28440670e+00,\n",
" -1.31297673e+00],\n",
" [ -1.26418478e+00, 8.00654259e-01, -1.22754100e+00,\n",
" -1.31297673e+00],\n",
" [ -1.26418478e+00, -1.24957601e-01, -1.34127240e+00,\n",
" -1.44444970e+00],\n",
" [ -1.87002413e+00, -1.24957601e-01, -1.51186952e+00,\n",
" -1.44444970e+00],\n",
" [ -5.25060772e-02, 2.18907205e+00, -1.45500381e+00,\n",
" -1.31297673e+00],\n",
" [ -1.73673948e-01, 3.11468391e+00, -1.28440670e+00,\n",
" -1.05003079e+00],\n",
" [ -5.37177559e-01, 1.95766909e+00, -1.39813811e+00,\n",
" -1.05003079e+00],\n",
" [ -9.00681170e-01, 1.03205722e+00, -1.34127240e+00,\n",
" -1.18150376e+00],\n",
" [ -1.73673948e-01, 1.72626612e+00, -1.17067529e+00,\n",
" -1.18150376e+00],\n",
" [ -9.00681170e-01, 1.72626612e+00, -1.28440670e+00,\n",
" -1.18150376e+00],\n",
" [ -5.37177559e-01, 8.00654259e-01, -1.17067529e+00,\n",
" -1.31297673e+00],\n",
" [ -9.00681170e-01, 1.49486315e+00, -1.28440670e+00,\n",
" -1.05003079e+00],\n",
" [ -1.50652052e+00, 1.26346019e+00, -1.56873522e+00,\n",
" -1.31297673e+00],\n",
" [ -9.00681170e-01, 5.69251294e-01, -1.17067529e+00,\n",
" -9.18557817e-01],\n",
" [ -1.26418478e+00, 8.00654259e-01, -1.05694388e+00,\n",
" -1.31297673e+00],\n",
" [ -1.02184904e+00, -1.24957601e-01, -1.22754100e+00,\n",
" -1.31297673e+00],\n",
" [ -1.02184904e+00, 8.00654259e-01, -1.22754100e+00,\n",
" -1.05003079e+00],\n",
" [ -7.79513300e-01, 1.03205722e+00, -1.28440670e+00,\n",
" -1.31297673e+00],\n",
" [ -7.79513300e-01, 8.00654259e-01, -1.34127240e+00,\n",
" -1.31297673e+00],\n",
" [ -1.38535265e+00, 3.37848329e-01, -1.22754100e+00,\n",
" -1.31297673e+00],\n",
" [ -1.26418478e+00, 1.06445364e-01, -1.22754100e+00,\n",
" -1.31297673e+00],\n",
" [ -5.37177559e-01, 8.00654259e-01, -1.28440670e+00,\n",
" -1.05003079e+00],\n",
" [ -7.79513300e-01, 2.42047502e+00, -1.28440670e+00,\n",
" -1.44444970e+00],\n",
" [ -4.16009689e-01, 2.65187798e+00, -1.34127240e+00,\n",
" -1.31297673e+00],\n",
" [ -1.14301691e+00, 1.06445364e-01, -1.28440670e+00,\n",
" -1.44444970e+00],\n",
" [ -1.02184904e+00, 3.37848329e-01, -1.45500381e+00,\n",
" -1.31297673e+00],\n",
" [ -4.16009689e-01, 1.03205722e+00, -1.39813811e+00,\n",
" -1.31297673e+00],\n",
" [ -1.14301691e+00, 1.06445364e-01, -1.28440670e+00,\n",
" -1.44444970e+00],\n",
" [ -1.74885626e+00, -1.24957601e-01, -1.39813811e+00,\n",
" -1.31297673e+00],\n",
" [ -9.00681170e-01, 8.00654259e-01, -1.28440670e+00,\n",
" -1.31297673e+00],\n",
" [ -1.02184904e+00, 1.03205722e+00, -1.39813811e+00,\n",
" -1.18150376e+00],\n",
" [ -1.62768839e+00, -1.74477836e+00, -1.39813811e+00,\n",
" -1.18150376e+00],\n",
" [ -1.74885626e+00, 3.37848329e-01, -1.39813811e+00,\n",
" -1.31297673e+00],\n",
" [ -1.02184904e+00, 1.03205722e+00, -1.22754100e+00,\n",
" -7.87084847e-01],\n",
" [ -9.00681170e-01, 1.72626612e+00, -1.05694388e+00,\n",
" -1.05003079e+00],\n",
" [ -1.26418478e+00, -1.24957601e-01, -1.34127240e+00,\n",
" -1.18150376e+00],\n",
" [ -9.00681170e-01, 1.72626612e+00, -1.22754100e+00,\n",
" -1.31297673e+00],\n",
" [ -1.50652052e+00, 3.37848329e-01, -1.34127240e+00,\n",
" -1.31297673e+00],\n",
" [ -6.58345429e-01, 1.49486315e+00, -1.28440670e+00,\n",
" -1.31297673e+00],\n",
" [ -1.02184904e+00, 5.69251294e-01, -1.34127240e+00,\n",
" -1.31297673e+00],\n",
" [ 1.40150837e+00, 3.37848329e-01, 5.35295827e-01,\n",
" 2.64698913e-01],\n",
" [ 6.74501145e-01, 3.37848329e-01, 4.21564419e-01,\n",
" 3.96171883e-01],\n",
" [ 1.28034050e+00, 1.06445364e-01, 6.49027235e-01,\n",
" 3.96171883e-01],\n",
" [ -4.16009689e-01, -1.74477836e+00, 1.37235899e-01,\n",
" 1.33225943e-01],\n",
" [ 7.95669016e-01, -5.87763531e-01, 4.78430123e-01,\n",
" 3.96171883e-01],\n",
" [ -1.73673948e-01, -5.87763531e-01, 4.21564419e-01,\n",
" 1.33225943e-01],\n",
" [ 5.53333275e-01, 5.69251294e-01, 5.35295827e-01,\n",
" 5.27644853e-01],\n",
" [ -1.14301691e+00, -1.51337539e+00, -2.60824029e-01,\n",
" -2.61192967e-01],\n",
" [ 9.16836886e-01, -3.56360566e-01, 4.78430123e-01,\n",
" 1.33225943e-01],\n",
" [ -7.79513300e-01, -8.19166497e-01, 8.03701950e-02,\n",
" 2.64698913e-01],\n",
" [ -1.02184904e+00, -2.43898725e+00, -1.47092621e-01,\n",
" -2.61192967e-01],\n",
" [ 6.86617933e-02, -1.24957601e-01, 2.50967307e-01,\n",
" 3.96171883e-01],\n",
" [ 1.89829664e-01, -1.97618132e+00, 1.37235899e-01,\n",
" -2.61192967e-01],\n",
" [ 3.10997534e-01, -3.56360566e-01, 5.35295827e-01,\n",
" 2.64698913e-01],\n",
" [ -2.94841818e-01, -3.56360566e-01, -9.02269170e-02,\n",
" 1.33225943e-01],\n",
" [ 1.03800476e+00, 1.06445364e-01, 3.64698715e-01,\n",
" 2.64698913e-01],\n",
" [ -2.94841818e-01, -1.24957601e-01, 4.21564419e-01,\n",
" 3.96171883e-01],\n",
" [ -5.25060772e-02, -8.19166497e-01, 1.94101603e-01,\n",
" -2.61192967e-01],\n",
" [ 4.32165405e-01, -1.97618132e+00, 4.21564419e-01,\n",
" 3.96171883e-01],\n",
" [ -2.94841818e-01, -1.28197243e+00, 8.03701950e-02,\n",
" -1.29719997e-01],\n",
" [ 6.86617933e-02, 3.37848329e-01, 5.92161531e-01,\n",
" 7.90590793e-01],\n",
" [ 3.10997534e-01, -5.87763531e-01, 1.37235899e-01,\n",
" 1.33225943e-01],\n",
" [ 5.53333275e-01, -1.28197243e+00, 6.49027235e-01,\n",
" 3.96171883e-01],\n",
" [ 3.10997534e-01, -5.87763531e-01, 5.35295827e-01,\n",
" 1.75297293e-03],\n",
" [ 6.74501145e-01, -3.56360566e-01, 3.07833011e-01,\n",
" 1.33225943e-01],\n",
" [ 9.16836886e-01, -1.24957601e-01, 3.64698715e-01,\n",
" 2.64698913e-01],\n",
" [ 1.15917263e+00, -5.87763531e-01, 5.92161531e-01,\n",
" 2.64698913e-01],\n",
" [ 1.03800476e+00, -1.24957601e-01, 7.05892939e-01,\n",
" 6.59117823e-01],\n",
" [ 1.89829664e-01, -3.56360566e-01, 4.21564419e-01,\n",
" 3.96171883e-01],\n",
" [ -1.73673948e-01, -1.05056946e+00, -1.47092621e-01,\n",
" -2.61192967e-01],\n",
" [ -4.16009689e-01, -1.51337539e+00, 2.35044910e-02,\n",
" -1.29719997e-01],\n",
" [ -4.16009689e-01, -1.51337539e+00, -3.33612130e-02,\n",
" -2.61192967e-01],\n",
" [ -5.25060772e-02, -8.19166497e-01, 8.03701950e-02,\n",
" 1.75297293e-03],\n",
" [ 1.89829664e-01, -8.19166497e-01, 7.62758643e-01,\n",
" 5.27644853e-01],\n",
" [ -5.37177559e-01, -1.24957601e-01, 4.21564419e-01,\n",
" 3.96171883e-01],\n",
" [ 1.89829664e-01, 8.00654259e-01, 4.21564419e-01,\n",
" 5.27644853e-01],\n",
" [ 1.03800476e+00, 1.06445364e-01, 5.35295827e-01,\n",
" 3.96171883e-01],\n",
" [ 5.53333275e-01, -1.74477836e+00, 3.64698715e-01,\n",
" 1.33225943e-01],\n",
" [ -2.94841818e-01, -1.24957601e-01, 1.94101603e-01,\n",
" 1.33225943e-01],\n",
" [ -4.16009689e-01, -1.28197243e+00, 1.37235899e-01,\n",
" 1.33225943e-01],\n",
" [ -4.16009689e-01, -1.05056946e+00, 3.64698715e-01,\n",
" 1.75297293e-03],\n",
" [ 3.10997534e-01, -1.24957601e-01, 4.78430123e-01,\n",
" 2.64698913e-01],\n",
" [ -5.25060772e-02, -1.05056946e+00, 1.37235899e-01,\n",
" 1.75297293e-03],\n",
" [ -1.02184904e+00, -1.74477836e+00, -2.60824029e-01,\n",
" -2.61192967e-01],\n",
" [ -2.94841818e-01, -8.19166497e-01, 2.50967307e-01,\n",
" 1.33225943e-01],\n",
" [ -1.73673948e-01, -1.24957601e-01, 2.50967307e-01,\n",
" 1.75297293e-03],\n",
" [ -1.73673948e-01, -3.56360566e-01, 2.50967307e-01,\n",
" 1.33225943e-01],\n",
" [ 4.32165405e-01, -3.56360566e-01, 3.07833011e-01,\n",
" 1.33225943e-01],\n",
" [ -9.00681170e-01, -1.28197243e+00, -4.31421141e-01,\n",
" -1.29719997e-01],\n",
" [ -1.73673948e-01, -5.87763531e-01, 1.94101603e-01,\n",
" 1.33225943e-01],\n",
" [ 5.53333275e-01, 5.69251294e-01, 1.27454998e+00,\n",
" 1.71090158e+00],\n",
" [ -5.25060772e-02, -8.19166497e-01, 7.62758643e-01,\n",
" 9.22063763e-01],\n",
" [ 1.52267624e+00, -1.24957601e-01, 1.21768427e+00,\n",
" 1.18500970e+00],\n",
" [ 5.53333275e-01, -3.56360566e-01, 1.04708716e+00,\n",
" 7.90590793e-01],\n",
" [ 7.95669016e-01, -1.24957601e-01, 1.16081857e+00,\n",
" 1.31648267e+00],\n",
" [ 2.12851559e+00, -1.24957601e-01, 1.61574420e+00,\n",
" 1.18500970e+00],\n",
" [ -1.14301691e+00, -1.28197243e+00, 4.21564419e-01,\n",
" 6.59117823e-01],\n",
" [ 1.76501198e+00, -3.56360566e-01, 1.44514709e+00,\n",
" 7.90590793e-01],\n",
" [ 1.03800476e+00, -1.28197243e+00, 1.16081857e+00,\n",
" 7.90590793e-01],\n",
" [ 1.64384411e+00, 1.26346019e+00, 1.33141568e+00,\n",
" 1.71090158e+00],\n",
" [ 7.95669016e-01, 3.37848329e-01, 7.62758643e-01,\n",
" 1.05353673e+00],\n",
" [ 6.74501145e-01, -8.19166497e-01, 8.76490051e-01,\n",
" 9.22063763e-01],\n",
" [ 1.15917263e+00, -1.24957601e-01, 9.90221459e-01,\n",
" 1.18500970e+00],\n",
" [ -1.73673948e-01, -1.28197243e+00, 7.05892939e-01,\n",
" 1.05353673e+00],\n",
" [ -5.25060772e-02, -5.87763531e-01, 7.62758643e-01,\n",
" 1.57942861e+00],\n",
" [ 6.74501145e-01, 3.37848329e-01, 8.76490051e-01,\n",
" 1.44795564e+00],\n",
" [ 7.95669016e-01, -1.24957601e-01, 9.90221459e-01,\n",
" 7.90590793e-01],\n",
" [ 2.24968346e+00, 1.72626612e+00, 1.67260991e+00,\n",
" 1.31648267e+00],\n",
" [ 2.24968346e+00, -1.05056946e+00, 1.78634131e+00,\n",
" 1.44795564e+00],\n",
" [ 1.89829664e-01, -1.97618132e+00, 7.05892939e-01,\n",
" 3.96171883e-01],\n",
" [ 1.28034050e+00, 3.37848329e-01, 1.10395287e+00,\n",
" 1.44795564e+00],\n",
" [ -2.94841818e-01, -5.87763531e-01, 6.49027235e-01,\n",
" 1.05353673e+00],\n",
" [ 2.24968346e+00, -5.87763531e-01, 1.67260991e+00,\n",
" 1.05353673e+00],\n",
" [ 5.53333275e-01, -8.19166497e-01, 6.49027235e-01,\n",
" 7.90590793e-01],\n",
" [ 1.03800476e+00, 5.69251294e-01, 1.10395287e+00,\n",
" 1.18500970e+00],\n",
" [ 1.64384411e+00, 3.37848329e-01, 1.27454998e+00,\n",
" 7.90590793e-01],\n",
" [ 4.32165405e-01, -5.87763531e-01, 5.92161531e-01,\n",
" 7.90590793e-01],\n",
" [ 3.10997534e-01, -1.24957601e-01, 6.49027235e-01,\n",
" 7.90590793e-01],\n",
" [ 6.74501145e-01, -5.87763531e-01, 1.04708716e+00,\n",
" 1.18500970e+00],\n",
" [ 1.64384411e+00, -1.24957601e-01, 1.16081857e+00,\n",
" 5.27644853e-01],\n",
" [ 1.88617985e+00, -5.87763531e-01, 1.33141568e+00,\n",
" 9.22063763e-01],\n",
" [ 2.49201920e+00, 1.72626612e+00, 1.50201279e+00,\n",
" 1.05353673e+00],\n",
" [ 6.74501145e-01, -5.87763531e-01, 1.04708716e+00,\n",
" 1.31648267e+00],\n",
" [ 5.53333275e-01, -5.87763531e-01, 7.62758643e-01,\n",
" 3.96171883e-01],\n",
" [ 3.10997534e-01, -1.05056946e+00, 1.04708716e+00,\n",
" 2.64698913e-01],\n",
" [ 2.24968346e+00, -1.24957601e-01, 1.33141568e+00,\n",
" 1.44795564e+00],\n",
" [ 5.53333275e-01, 8.00654259e-01, 1.04708716e+00,\n",
" 1.57942861e+00],\n",
" [ 6.74501145e-01, 1.06445364e-01, 9.90221459e-01,\n",
" 7.90590793e-01],\n",
" [ 1.89829664e-01, -1.24957601e-01, 5.92161531e-01,\n",
" 7.90590793e-01],\n",
" [ 1.28034050e+00, 1.06445364e-01, 9.33355755e-01,\n",
" 1.18500970e+00],\n",
" [ 1.03800476e+00, 1.06445364e-01, 1.04708716e+00,\n",
" 1.57942861e+00],\n",
" [ 1.28034050e+00, 1.06445364e-01, 7.62758643e-01,\n",
" 1.44795564e+00],\n",
" [ -5.25060772e-02, -8.19166497e-01, 7.62758643e-01,\n",
" 9.22063763e-01],\n",
" [ 1.15917263e+00, 3.37848329e-01, 1.21768427e+00,\n",
" 1.44795564e+00],\n",
" [ 1.03800476e+00, 5.69251294e-01, 1.10395287e+00,\n",
" 1.71090158e+00],\n",
" [ 1.03800476e+00, -1.24957601e-01, 8.19624347e-01,\n",
" 1.44795564e+00],\n",
" [ 5.53333275e-01, -1.28197243e+00, 7.05892939e-01,\n",
" 9.22063763e-01],\n",
" [ 7.95669016e-01, -1.24957601e-01, 8.19624347e-01,\n",
" 1.05353673e+00],\n",
" [ 4.32165405e-01, 8.00654259e-01, 9.33355755e-01,\n",
" 1.44795564e+00],\n",
" [ 6.86617933e-02, -1.24957601e-01, 7.62758643e-01,\n",
" 7.90590793e-01]])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 标准化需要计算特征的均值和方差, 返回值为标准化后的数据\n",
"# x' = (x - mean(x)) / S\n",
"from sklearn.preprocessing import StandardScaler\n",
"StandardScaler().fit_transform(iris.data)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0.80377277, 0.55160877, 0.22064351, 0.0315205 ],\n",
" [ 0.82813287, 0.50702013, 0.23660939, 0.03380134],\n",
" [ 0.80533308, 0.54831188, 0.2227517 , 0.03426949],\n",
" [ 0.80003025, 0.53915082, 0.26087943, 0.03478392],\n",
" [ 0.790965 , 0.5694948 , 0.2214702 , 0.0316386 ],\n",
" [ 0.78417499, 0.5663486 , 0.2468699 , 0.05808704],\n",
" [ 0.78010936, 0.57660257, 0.23742459, 0.0508767 ],\n",
" [ 0.80218492, 0.54548574, 0.24065548, 0.0320874 ],\n",
" [ 0.80642366, 0.5315065 , 0.25658935, 0.03665562],\n",
" [ 0.81803119, 0.51752994, 0.25041771, 0.01669451],\n",
" [ 0.80373519, 0.55070744, 0.22325977, 0.02976797],\n",
" [ 0.786991 , 0.55745196, 0.26233033, 0.03279129],\n",
" [ 0.82307218, 0.51442011, 0.24006272, 0.01714734],\n",
" [ 0.8025126 , 0.55989251, 0.20529392, 0.01866308],\n",
" [ 0.81120865, 0.55945424, 0.16783627, 0.02797271],\n",
" [ 0.77381111, 0.59732787, 0.2036345 , 0.05430253],\n",
" [ 0.79428944, 0.57365349, 0.19121783, 0.05883625],\n",
" [ 0.80327412, 0.55126656, 0.22050662, 0.04725142],\n",
" [ 0.8068282 , 0.53788547, 0.24063297, 0.04246464],\n",
" [ 0.77964883, 0.58091482, 0.22930848, 0.0458617 ],\n",
" [ 0.8173379 , 0.51462016, 0.25731008, 0.03027177],\n",
" [ 0.78591858, 0.57017622, 0.23115252, 0.06164067],\n",
" [ 0.77577075, 0.60712493, 0.16864581, 0.03372916],\n",
" [ 0.80597792, 0.52151512, 0.26865931, 0.07901744],\n",
" [ 0.776114 , 0.54974742, 0.30721179, 0.03233808],\n",
" [ 0.82647451, 0.4958847 , 0.26447184, 0.03305898],\n",
" [ 0.79778206, 0.5424918 , 0.25529026, 0.06382256],\n",
" [ 0.80641965, 0.54278246, 0.23262105, 0.03101614],\n",
" [ 0.81609427, 0.5336001 , 0.21971769, 0.03138824],\n",
" [ 0.79524064, 0.54144043, 0.27072022, 0.03384003],\n",
" [ 0.80846584, 0.52213419, 0.26948861, 0.03368608],\n",
" [ 0.82225028, 0.51771314, 0.22840286, 0.06090743],\n",
" [ 0.76578311, 0.60379053, 0.22089897, 0.0147266 ],\n",
" [ 0.77867447, 0.59462414, 0.19820805, 0.02831544],\n",
" [ 0.81803119, 0.51752994, 0.25041771, 0.01669451],\n",
" [ 0.82512295, 0.52807869, 0.19802951, 0.03300492],\n",
" [ 0.82699754, 0.52627116, 0.19547215, 0.03007264],\n",
" [ 0.81803119, 0.51752994, 0.25041771, 0.01669451],\n",
" [ 0.80212413, 0.54690282, 0.23699122, 0.03646019],\n",
" [ 0.80779568, 0.53853046, 0.23758697, 0.03167826],\n",
" [ 0.80033301, 0.56023311, 0.20808658, 0.04801998],\n",
" [ 0.86093857, 0.44003527, 0.24871559, 0.0573959 ],\n",
" [ 0.78609038, 0.57170209, 0.23225397, 0.03573138],\n",
" [ 0.78889479, 0.55222635, 0.25244633, 0.09466737],\n",
" [ 0.76693897, 0.57144472, 0.28572236, 0.06015208],\n",
" [ 0.82210585, 0.51381615, 0.23978087, 0.05138162],\n",
" [ 0.77729093, 0.57915795, 0.24385598, 0.030482 ],\n",
" [ 0.79594782, 0.55370283, 0.24224499, 0.03460643],\n",
" [ 0.79837025, 0.55735281, 0.22595384, 0.03012718],\n",
" [ 0.81228363, 0.5361072 , 0.22743942, 0.03249135],\n",
" [ 0.76701103, 0.35063361, 0.51499312, 0.15340221],\n",
" [ 0.74549757, 0.37274878, 0.52417798, 0.17472599],\n",
" [ 0.75519285, 0.33928954, 0.53629637, 0.16417236],\n",
" [ 0.75384916, 0.31524601, 0.54825394, 0.17818253],\n",
" [ 0.7581754 , 0.32659863, 0.5365549 , 0.17496355],\n",
" [ 0.72232962, 0.35482858, 0.57026022, 0.16474184],\n",
" [ 0.72634846, 0.38046824, 0.54187901, 0.18446945],\n",
" [ 0.75916547, 0.37183615, 0.51127471, 0.15493173],\n",
" [ 0.76301853, 0.33526572, 0.53180079, 0.15029153],\n",
" [ 0.72460233, 0.37623583, 0.54345175, 0.19508524],\n",
" [ 0.76923077, 0.30769231, 0.53846154, 0.15384615],\n",
" [ 0.73923462, 0.37588201, 0.52623481, 0.187941 ],\n",
" [ 0.78892752, 0.28927343, 0.52595168, 0.13148792],\n",
" [ 0.73081412, 0.34743622, 0.56308629, 0.16772783],\n",
" [ 0.75911707, 0.3931142 , 0.48800383, 0.17622361],\n",
" [ 0.76945444, 0.35601624, 0.50531337, 0.16078153],\n",
" [ 0.70631892, 0.37838513, 0.5675777 , 0.18919257],\n",
" [ 0.75676497, 0.35228714, 0.53495455, 0.13047672],\n",
" [ 0.76444238, 0.27125375, 0.55483721, 0.18494574],\n",
" [ 0.76185188, 0.34011245, 0.53057542, 0.14964948],\n",
" [ 0.6985796 , 0.37889063, 0.56833595, 0.21312598],\n",
" [ 0.77011854, 0.35349703, 0.50499576, 0.16412362],\n",
" [ 0.74143307, 0.29421947, 0.57667016, 0.17653168],\n",
" [ 0.73659895, 0.33811099, 0.56754345, 0.14490471],\n",
" [ 0.76741698, 0.34773582, 0.51560829, 0.15588157],\n",
" [ 0.76785726, 0.34902603, 0.51190484, 0.16287881],\n",
" [ 0.76467269, 0.31486523, 0.53976896, 0.15743261],\n",
" [ 0.74088576, 0.33173989, 0.55289982, 0.18798594],\n",
" [ 0.73350949, 0.35452959, 0.55013212, 0.18337737],\n",
" [ 0.78667474, 0.35883409, 0.48304589, 0.13801311],\n",
" [ 0.76521855, 0.33391355, 0.52869645, 0.15304371],\n",
" [ 0.77242925, 0.33706004, 0.51963422, 0.14044168],\n",
" [ 0.76434981, 0.35581802, 0.51395936, 0.15814134],\n",
" [ 0.70779525, 0.31850786, 0.60162596, 0.1887454 ],\n",
" [ 0.69333409, 0.38518561, 0.57777841, 0.1925928 ],\n",
" [ 0.71524936, 0.40530797, 0.53643702, 0.19073316],\n",
" [ 0.75457341, 0.34913098, 0.52932761, 0.16893434],\n",
" [ 0.77530021, 0.28304611, 0.54147951, 0.15998258],\n",
" [ 0.72992443, 0.39103094, 0.53440896, 0.16944674],\n",
" [ 0.74714194, 0.33960997, 0.54337595, 0.17659719],\n",
" [ 0.72337118, 0.34195729, 0.57869695, 0.15782644],\n",
" [ 0.73260391, 0.36029701, 0.55245541, 0.1681386 ],\n",
" [ 0.76262994, 0.34186859, 0.52595168, 0.1577855 ],\n",
" [ 0.76986879, 0.35413965, 0.5081134 , 0.15397376],\n",
" [ 0.73544284, 0.35458851, 0.55158213, 0.1707278 ],\n",
" [ 0.73239618, 0.38547167, 0.53966034, 0.15418867],\n",
" [ 0.73446047, 0.37367287, 0.5411814 , 0.16750853],\n",
" [ 0.75728103, 0.3542121 , 0.52521104, 0.15878473],\n",
" [ 0.78258054, 0.38361791, 0.4603415 , 0.16879188],\n",
" [ 0.7431482 , 0.36505526, 0.5345452 , 0.16948994],\n",
" [ 0.65387747, 0.34250725, 0.62274045, 0.25947519],\n",
" [ 0.69052512, 0.32145135, 0.60718588, 0.22620651],\n",
" [ 0.71491405, 0.30207636, 0.59408351, 0.21145345],\n",
" [ 0.69276796, 0.31889319, 0.61579374, 0.1979337 ],\n",
" [ 0.68619022, 0.31670318, 0.61229281, 0.232249 ],\n",
" [ 0.70953708, 0.28008043, 0.61617694, 0.1960563 ],\n",
" [ 0.67054118, 0.34211284, 0.61580312, 0.23263673],\n",
" [ 0.71366557, 0.28351098, 0.61590317, 0.17597233],\n",
" [ 0.71414125, 0.26647062, 0.61821183, 0.19185884],\n",
" [ 0.69198788, 0.34599394, 0.58626751, 0.24027357],\n",
" [ 0.71562645, 0.3523084 , 0.56149152, 0.22019275],\n",
" [ 0.71576546, 0.30196356, 0.59274328, 0.21249287],\n",
" [ 0.71718148, 0.31640359, 0.58007326, 0.22148252],\n",
" [ 0.6925518 , 0.30375079, 0.60750157, 0.24300063],\n",
" [ 0.67767924, 0.32715549, 0.59589036, 0.28041899],\n",
" [ 0.69589887, 0.34794944, 0.57629125, 0.25008866],\n",
" [ 0.70610474, 0.3258945 , 0.59747324, 0.1955367 ],\n",
" [ 0.69299099, 0.34199555, 0.60299216, 0.19799743],\n",
" [ 0.70600618, 0.2383917 , 0.63265489, 0.21088496],\n",
" [ 0.72712585, 0.26661281, 0.60593821, 0.18178146],\n",
" [ 0.70558934, 0.32722984, 0.58287815, 0.23519645],\n",
" [ 0.68307923, 0.34153961, 0.59769433, 0.24395687],\n",
" [ 0.71486543, 0.25995106, 0.62202576, 0.18567933],\n",
" [ 0.73122464, 0.31338199, 0.56873028, 0.20892133],\n",
" [ 0.69595601, 0.3427843 , 0.59208198, 0.21813547],\n",
" [ 0.71529453, 0.31790868, 0.59607878, 0.17882363],\n",
" [ 0.72785195, 0.32870733, 0.56349829, 0.21131186],\n",
" [ 0.71171214, 0.35002236, 0.57170319, 0.21001342],\n",
" [ 0.69594002, 0.30447376, 0.60894751, 0.22835532],\n",
" [ 0.73089855, 0.30454106, 0.58877939, 0.1624219 ],\n",
" [ 0.72766159, 0.27533141, 0.59982915, 0.18683203],\n",
" [ 0.71578999, 0.34430405, 0.5798805 , 0.18121266],\n",
" [ 0.69417747, 0.30370264, 0.60740528, 0.2386235 ],\n",
" [ 0.72366005, 0.32162669, 0.58582004, 0.17230001],\n",
" [ 0.69385414, 0.29574111, 0.63698085, 0.15924521],\n",
" [ 0.73154399, 0.28501714, 0.57953485, 0.21851314],\n",
" [ 0.67017484, 0.36168166, 0.59571097, 0.2553047 ],\n",
" [ 0.69804799, 0.338117 , 0.59988499, 0.196326 ],\n",
" [ 0.71066905, 0.35533453, 0.56853524, 0.21320072],\n",
" [ 0.72415258, 0.32534391, 0.56672811, 0.22039426],\n",
" [ 0.69997037, 0.32386689, 0.58504986, 0.25073566],\n",
" [ 0.73337886, 0.32948905, 0.54206264, 0.24445962],\n",
" [ 0.69052512, 0.32145135, 0.60718588, 0.22620651],\n",
" [ 0.69193502, 0.32561648, 0.60035539, 0.23403685],\n",
" [ 0.68914871, 0.33943145, 0.58629069, 0.25714504],\n",
" [ 0.72155725, 0.32308533, 0.56001458, 0.24769876],\n",
" [ 0.72965359, 0.28954508, 0.57909015, 0.22005426],\n",
" [ 0.71653899, 0.3307103 , 0.57323119, 0.22047353],\n",
" [ 0.67467072, 0.36998072, 0.58761643, 0.25028107],\n",
" [ 0.69025916, 0.35097923, 0.5966647 , 0.21058754]])"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 归一化\n",
"from sklearn.preprocessing import Normalizer\n",
"Normalizer().fit_transform(iris.data)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0.22222222, 0.625 , 0.06779661, 0.04166667],\n",
" [ 0.16666667, 0.41666667, 0.06779661, 0.04166667],\n",
" [ 0.11111111, 0.5 , 0.05084746, 0.04166667],\n",
" [ 0.08333333, 0.45833333, 0.08474576, 0.04166667],\n",
" [ 0.19444444, 0.66666667, 0.06779661, 0.04166667],\n",
" [ 0.30555556, 0.79166667, 0.11864407, 0.125 ],\n",
" [ 0.08333333, 0.58333333, 0.06779661, 0.08333333],\n",
" [ 0.19444444, 0.58333333, 0.08474576, 0.04166667],\n",
" [ 0.02777778, 0.375 , 0.06779661, 0.04166667],\n",
" [ 0.16666667, 0.45833333, 0.08474576, 0. ],\n",
" [ 0.30555556, 0.70833333, 0.08474576, 0.04166667],\n",
" [ 0.13888889, 0.58333333, 0.10169492, 0.04166667],\n",
" [ 0.13888889, 0.41666667, 0.06779661, 0. ],\n",
" [ 0. , 0.41666667, 0.01694915, 0. ],\n",
" [ 0.41666667, 0.83333333, 0.03389831, 0.04166667],\n",
" [ 0.38888889, 1. , 0.08474576, 0.125 ],\n",
" [ 0.30555556, 0.79166667, 0.05084746, 0.125 ],\n",
" [ 0.22222222, 0.625 , 0.06779661, 0.08333333],\n",
" [ 0.38888889, 0.75 , 0.11864407, 0.08333333],\n",
" [ 0.22222222, 0.75 , 0.08474576, 0.08333333],\n",
" [ 0.30555556, 0.58333333, 0.11864407, 0.04166667],\n",
" [ 0.22222222, 0.70833333, 0.08474576, 0.125 ],\n",
" [ 0.08333333, 0.66666667, 0. , 0.04166667],\n",
" [ 0.22222222, 0.54166667, 0.11864407, 0.16666667],\n",
" [ 0.13888889, 0.58333333, 0.15254237, 0.04166667],\n",
" [ 0.19444444, 0.41666667, 0.10169492, 0.04166667],\n",
" [ 0.19444444, 0.58333333, 0.10169492, 0.125 ],\n",
" [ 0.25 , 0.625 , 0.08474576, 0.04166667],\n",
" [ 0.25 , 0.58333333, 0.06779661, 0.04166667],\n",
" [ 0.11111111, 0.5 , 0.10169492, 0.04166667],\n",
" [ 0.13888889, 0.45833333, 0.10169492, 0.04166667],\n",
" [ 0.30555556, 0.58333333, 0.08474576, 0.125 ],\n",
" [ 0.25 , 0.875 , 0.08474576, 0. ],\n",
" [ 0.33333333, 0.91666667, 0.06779661, 0.04166667],\n",
" [ 0.16666667, 0.45833333, 0.08474576, 0. ],\n",
" [ 0.19444444, 0.5 , 0.03389831, 0.04166667],\n",
" [ 0.33333333, 0.625 , 0.05084746, 0.04166667],\n",
" [ 0.16666667, 0.45833333, 0.08474576, 0. ],\n",
" [ 0.02777778, 0.41666667, 0.05084746, 0.04166667],\n",
" [ 0.22222222, 0.58333333, 0.08474576, 0.04166667],\n",
" [ 0.19444444, 0.625 , 0.05084746, 0.08333333],\n",
" [ 0.05555556, 0.125 , 0.05084746, 0.08333333],\n",
" [ 0.02777778, 0.5 , 0.05084746, 0.04166667],\n",
" [ 0.19444444, 0.625 , 0.10169492, 0.20833333],\n",
" [ 0.22222222, 0.75 , 0.15254237, 0.125 ],\n",
" [ 0.13888889, 0.41666667, 0.06779661, 0.08333333],\n",
" [ 0.22222222, 0.75 , 0.10169492, 0.04166667],\n",
" [ 0.08333333, 0.5 , 0.06779661, 0.04166667],\n",
" [ 0.27777778, 0.70833333, 0.08474576, 0.04166667],\n",
" [ 0.19444444, 0.54166667, 0.06779661, 0.04166667],\n",
" [ 0.75 , 0.5 , 0.62711864, 0.54166667],\n",
" [ 0.58333333, 0.5 , 0.59322034, 0.58333333],\n",
" [ 0.72222222, 0.45833333, 0.66101695, 0.58333333],\n",
" [ 0.33333333, 0.125 , 0.50847458, 0.5 ],\n",
" [ 0.61111111, 0.33333333, 0.61016949, 0.58333333],\n",
" [ 0.38888889, 0.33333333, 0.59322034, 0.5 ],\n",
" [ 0.55555556, 0.54166667, 0.62711864, 0.625 ],\n",
" [ 0.16666667, 0.16666667, 0.38983051, 0.375 ],\n",
" [ 0.63888889, 0.375 , 0.61016949, 0.5 ],\n",
" [ 0.25 , 0.29166667, 0.49152542, 0.54166667],\n",
" [ 0.19444444, 0. , 0.42372881, 0.375 ],\n",
" [ 0.44444444, 0.41666667, 0.54237288, 0.58333333],\n",
" [ 0.47222222, 0.08333333, 0.50847458, 0.375 ],\n",
" [ 0.5 , 0.375 , 0.62711864, 0.54166667],\n",
" [ 0.36111111, 0.375 , 0.44067797, 0.5 ],\n",
" [ 0.66666667, 0.45833333, 0.57627119, 0.54166667],\n",
" [ 0.36111111, 0.41666667, 0.59322034, 0.58333333],\n",
" [ 0.41666667, 0.29166667, 0.52542373, 0.375 ],\n",
" [ 0.52777778, 0.08333333, 0.59322034, 0.58333333],\n",
" [ 0.36111111, 0.20833333, 0.49152542, 0.41666667],\n",
" [ 0.44444444, 0.5 , 0.6440678 , 0.70833333],\n",
" [ 0.5 , 0.33333333, 0.50847458, 0.5 ],\n",
" [ 0.55555556, 0.20833333, 0.66101695, 0.58333333],\n",
" [ 0.5 , 0.33333333, 0.62711864, 0.45833333],\n",
" [ 0.58333333, 0.375 , 0.55932203, 0.5 ],\n",
" [ 0.63888889, 0.41666667, 0.57627119, 0.54166667],\n",
" [ 0.69444444, 0.33333333, 0.6440678 , 0.54166667],\n",
" [ 0.66666667, 0.41666667, 0.6779661 , 0.66666667],\n",
" [ 0.47222222, 0.375 , 0.59322034, 0.58333333],\n",
" [ 0.38888889, 0.25 , 0.42372881, 0.375 ],\n",
" [ 0.33333333, 0.16666667, 0.47457627, 0.41666667],\n",
" [ 0.33333333, 0.16666667, 0.45762712, 0.375 ],\n",
" [ 0.41666667, 0.29166667, 0.49152542, 0.45833333],\n",
" [ 0.47222222, 0.29166667, 0.69491525, 0.625 ],\n",
" [ 0.30555556, 0.41666667, 0.59322034, 0.58333333],\n",
" [ 0.47222222, 0.58333333, 0.59322034, 0.625 ],\n",
" [ 0.66666667, 0.45833333, 0.62711864, 0.58333333],\n",
" [ 0.55555556, 0.125 , 0.57627119, 0.5 ],\n",
" [ 0.36111111, 0.41666667, 0.52542373, 0.5 ],\n",
" [ 0.33333333, 0.20833333, 0.50847458, 0.5 ],\n",
" [ 0.33333333, 0.25 , 0.57627119, 0.45833333],\n",
" [ 0.5 , 0.41666667, 0.61016949, 0.54166667],\n",
" [ 0.41666667, 0.25 , 0.50847458, 0.45833333],\n",
" [ 0.19444444, 0.125 , 0.38983051, 0.375 ],\n",
" [ 0.36111111, 0.29166667, 0.54237288, 0.5 ],\n",
" [ 0.38888889, 0.41666667, 0.54237288, 0.45833333],\n",
" [ 0.38888889, 0.375 , 0.54237288, 0.5 ],\n",
" [ 0.52777778, 0.375 , 0.55932203, 0.5 ],\n",
" [ 0.22222222, 0.20833333, 0.33898305, 0.41666667],\n",
" [ 0.38888889, 0.33333333, 0.52542373, 0.5 ],\n",
" [ 0.55555556, 0.54166667, 0.84745763, 1. ],\n",
" [ 0.41666667, 0.29166667, 0.69491525, 0.75 ],\n",
" [ 0.77777778, 0.41666667, 0.83050847, 0.83333333],\n",
" [ 0.55555556, 0.375 , 0.77966102, 0.70833333],\n",
" [ 0.61111111, 0.41666667, 0.81355932, 0.875 ],\n",
" [ 0.91666667, 0.41666667, 0.94915254, 0.83333333],\n",
" [ 0.16666667, 0.20833333, 0.59322034, 0.66666667],\n",
" [ 0.83333333, 0.375 , 0.89830508, 0.70833333],\n",
" [ 0.66666667, 0.20833333, 0.81355932, 0.70833333],\n",
" [ 0.80555556, 0.66666667, 0.86440678, 1. ],\n",
" [ 0.61111111, 0.5 , 0.69491525, 0.79166667],\n",
" [ 0.58333333, 0.29166667, 0.72881356, 0.75 ],\n",
" [ 0.69444444, 0.41666667, 0.76271186, 0.83333333],\n",
" [ 0.38888889, 0.20833333, 0.6779661 , 0.79166667],\n",
" [ 0.41666667, 0.33333333, 0.69491525, 0.95833333],\n",
" [ 0.58333333, 0.5 , 0.72881356, 0.91666667],\n",
" [ 0.61111111, 0.41666667, 0.76271186, 0.70833333],\n",
" [ 0.94444444, 0.75 , 0.96610169, 0.875 ],\n",
" [ 0.94444444, 0.25 , 1. , 0.91666667],\n",
" [ 0.47222222, 0.08333333, 0.6779661 , 0.58333333],\n",
" [ 0.72222222, 0.5 , 0.79661017, 0.91666667],\n",
" [ 0.36111111, 0.33333333, 0.66101695, 0.79166667],\n",
" [ 0.94444444, 0.33333333, 0.96610169, 0.79166667],\n",
" [ 0.55555556, 0.29166667, 0.66101695, 0.70833333],\n",
" [ 0.66666667, 0.54166667, 0.79661017, 0.83333333],\n",
" [ 0.80555556, 0.5 , 0.84745763, 0.70833333],\n",
" [ 0.52777778, 0.33333333, 0.6440678 , 0.70833333],\n",
" [ 0.5 , 0.41666667, 0.66101695, 0.70833333],\n",
" [ 0.58333333, 0.33333333, 0.77966102, 0.83333333],\n",
" [ 0.80555556, 0.41666667, 0.81355932, 0.625 ],\n",
" [ 0.86111111, 0.33333333, 0.86440678, 0.75 ],\n",
" [ 1. , 0.75 , 0.91525424, 0.79166667],\n",
" [ 0.58333333, 0.33333333, 0.77966102, 0.875 ],\n",
" [ 0.55555556, 0.33333333, 0.69491525, 0.58333333],\n",
" [ 0.5 , 0.25 , 0.77966102, 0.54166667],\n",
" [ 0.94444444, 0.41666667, 0.86440678, 0.91666667],\n",
" [ 0.55555556, 0.58333333, 0.77966102, 0.95833333],\n",
" [ 0.58333333, 0.45833333, 0.76271186, 0.70833333],\n",
" [ 0.47222222, 0.41666667, 0.6440678 , 0.70833333],\n",
" [ 0.72222222, 0.45833333, 0.74576271, 0.83333333],\n",
" [ 0.66666667, 0.45833333, 0.77966102, 0.95833333],\n",
" [ 0.72222222, 0.45833333, 0.69491525, 0.91666667],\n",
" [ 0.41666667, 0.29166667, 0.69491525, 0.75 ],\n",
" [ 0.69444444, 0.5 , 0.83050847, 0.91666667],\n",
" [ 0.66666667, 0.54166667, 0.79661017, 1. ],\n",
" [ 0.66666667, 0.41666667, 0.71186441, 0.91666667],\n",
" [ 0.55555556, 0.20833333, 0.6779661 , 0.75 ],\n",
" [ 0.61111111, 0.41666667, 0.71186441, 0.79166667],\n",
" [ 0.52777778, 0.58333333, 0.74576271, 0.91666667],\n",
" [ 0.44444444, 0.41666667, 0.69491525, 0.70833333]])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 区间缩放法,最常见的是最值缩放\n",
"# x' = (x- Min)/(Max - Min)\n",
"from sklearn.preprocessing import MinMaxScaler\n",
"MinMaxScaler().fit_transform(iris.data)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 标准化与归一化的区别\n",
"标准化是依照特征矩阵的列处理数据,其通过求z-score的方法,将样本的特征值转换到同一量纲下。\n",
"\n",
"归一化是依照特征矩阵的行处理数据,其目的在于样本向量在点乘运算或其他核函数计算相似性时,拥有统一的标准,也就是说都转化为“单位向量。"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 1., 1., 0., 0.],\n",
" [ 1., 0., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 0., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 0., 0., 0.],\n",
" [ 1., 0., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 0., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 0., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 0., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 0., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 0., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 0., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 0., 1., 0.],\n",
" [ 1., 1., 1., 0.],\n",
" [ 1., 0., 1., 0.]])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 二値化\n",
"from sklearn.preprocessing import Binarizer\n",
"Binarizer(threshold=3).fit_transform(iris.data)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" (0, 0)\t1.0\n",
" (1, 0)\t1.0\n",
" (2, 0)\t1.0\n",
" (3, 0)\t1.0\n",
" (4, 0)\t1.0\n",
" (5, 0)\t1.0\n",
" (6, 0)\t1.0\n",
" (7, 0)\t1.0\n",
" (8, 0)\t1.0\n",
" (9, 0)\t1.0\n",
" (10, 0)\t1.0\n",
" (11, 0)\t1.0\n",
" (12, 0)\t1.0\n",
" (13, 0)\t1.0\n",
" (14, 0)\t1.0\n",
" (15, 0)\t1.0\n",
" (16, 0)\t1.0\n",
" (17, 0)\t1.0\n",
" (18, 0)\t1.0\n",
" (19, 0)\t1.0\n",
" (20, 0)\t1.0\n",
" (21, 0)\t1.0\n",
" (22, 0)\t1.0\n",
" (23, 0)\t1.0\n",
" (24, 0)\t1.0\n",
" :\t:\n",
" (125, 2)\t1.0\n",
" (126, 2)\t1.0\n",
" (127, 2)\t1.0\n",
" (128, 2)\t1.0\n",
" (129, 2)\t1.0\n",
" (130, 2)\t1.0\n",
" (131, 2)\t1.0\n",
" (132, 2)\t1.0\n",
" (133, 2)\t1.0\n",
" (134, 2)\t1.0\n",
" (135, 2)\t1.0\n",
" (136, 2)\t1.0\n",
" (137, 2)\t1.0\n",
" (138, 2)\t1.0\n",
" (139, 2)\t1.0\n",
" (140, 2)\t1.0\n",
" (141, 2)\t1.0\n",
" (142, 2)\t1.0\n",
" (143, 2)\t1.0\n",
" (144, 2)\t1.0\n",
" (145, 2)\t1.0\n",
" (146, 2)\t1.0\n",
" (147, 2)\t1.0\n",
" (148, 2)\t1.0\n",
" (149, 2)\t1.0\n"
]
}
],
"source": [
"# 定性特征One-Hot\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"print(OneHotEncoder().fit_transform(iris.target.reshape((-1,1))))\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([], shape=(4, 0), dtype=float64)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 缺失值计算\n",
"# 返回值为计算缺失值后的数据\n",
"\n",
"from sklearn.preprocessing import Imputer\n",
"Imputer().fit_transform(np.vstack(np.asarray([np.nan for i in range(4)])), iris.data)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 1. , 5.1 , 3.5 , ..., 1.96, 0.28, 0.04],\n",
" [ 1. , 4.9 , 3. , ..., 1.96, 0.28, 0.04],\n",
" [ 1. , 4.7 , 3.2 , ..., 1.69, 0.26, 0.04],\n",
" ..., \n",
" [ 1. , 6.5 , 3. , ..., 27.04, 10.4 , 4. ],\n",
" [ 1. , 6.2 , 3.4 , ..., 29.16, 12.42, 5.29],\n",
" [ 1. , 5.9 , 3. , ..., 26.01, 9.18, 3.24]])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 数据变换\n",
"# 有基于多项式的,基于指数的,基于对数函数的\n",
"from sklearn.preprocessing import PolynomialFeatures\n",
"PolynomialFeatures().fit_transform(iris.data)\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 1.80828877, 1.5040774 , 0.87546874, 0.18232156],\n",
" [ 1.77495235, 1.38629436, 0.87546874, 0.18232156],\n",
" [ 1.74046617, 1.43508453, 0.83290912, 0.18232156],\n",
" [ 1.7227666 , 1.41098697, 0.91629073, 0.18232156],\n",
" [ 1.79175947, 1.5260563 , 0.87546874, 0.18232156],\n",
" [ 1.85629799, 1.58923521, 0.99325177, 0.33647224],\n",
" [ 1.7227666 , 1.48160454, 0.87546874, 0.26236426],\n",
" [ 1.79175947, 1.48160454, 0.91629073, 0.18232156],\n",
" [ 1.68639895, 1.36097655, 0.87546874, 0.18232156],\n",
" [ 1.77495235, 1.41098697, 0.91629073, 0.09531018],\n",
" [ 1.85629799, 1.54756251, 0.91629073, 0.18232156],\n",
" [ 1.75785792, 1.48160454, 0.95551145, 0.18232156],\n",
" [ 1.75785792, 1.38629436, 0.87546874, 0.09531018],\n",
" [ 1.66770682, 1.38629436, 0.74193734, 0.09531018],\n",
" [ 1.91692261, 1.60943791, 0.78845736, 0.18232156],\n",
" [ 1.90210753, 1.68639895, 0.91629073, 0.33647224],\n",
" [ 1.85629799, 1.58923521, 0.83290912, 0.33647224],\n",
" [ 1.80828877, 1.5040774 , 0.87546874, 0.26236426],\n",
" [ 1.90210753, 1.56861592, 0.99325177, 0.26236426],\n",
" [ 1.80828877, 1.56861592, 0.91629073, 0.26236426],\n",
" [ 1.85629799, 1.48160454, 0.99325177, 0.18232156],\n",
" [ 1.80828877, 1.54756251, 0.91629073, 0.33647224],\n",
" [ 1.7227666 , 1.5260563 , 0.69314718, 0.18232156],\n",
" [ 1.80828877, 1.45861502, 0.99325177, 0.40546511],\n",
" [ 1.75785792, 1.48160454, 1.06471074, 0.18232156],\n",
" [ 1.79175947, 1.38629436, 0.95551145, 0.18232156],\n",
" [ 1.79175947, 1.48160454, 0.95551145, 0.33647224],\n",
" [ 1.82454929, 1.5040774 , 0.91629073, 0.18232156],\n",
" [ 1.82454929, 1.48160454, 0.87546874, 0.18232156],\n",
" [ 1.74046617, 1.43508453, 0.95551145, 0.18232156],\n",
" [ 1.75785792, 1.41098697, 0.95551145, 0.18232156],\n",
" [ 1.85629799, 1.48160454, 0.91629073, 0.33647224],\n",
" [ 1.82454929, 1.62924054, 0.91629073, 0.09531018],\n",
" [ 1.87180218, 1.64865863, 0.87546874, 0.18232156],\n",
" [ 1.77495235, 1.41098697, 0.91629073, 0.09531018],\n",
" [ 1.79175947, 1.43508453, 0.78845736, 0.18232156],\n",
" [ 1.87180218, 1.5040774 , 0.83290912, 0.18232156],\n",
" [ 1.77495235, 1.41098697, 0.91629073, 0.09531018],\n",
" [ 1.68639895, 1.38629436, 0.83290912, 0.18232156],\n",
" [ 1.80828877, 1.48160454, 0.91629073, 0.18232156],\n",
" [ 1.79175947, 1.5040774 , 0.83290912, 0.26236426],\n",
" [ 1.70474809, 1.19392247, 0.83290912, 0.26236426],\n",
" [ 1.68639895, 1.43508453, 0.83290912, 0.18232156],\n",
" [ 1.79175947, 1.5040774 , 0.95551145, 0.47000363],\n",
" [ 1.80828877, 1.56861592, 1.06471074, 0.33647224],\n",
" [ 1.75785792, 1.38629436, 0.87546874, 0.26236426],\n",
" [ 1.80828877, 1.56861592, 0.95551145, 0.18232156],\n",
" [ 1.7227666 , 1.43508453, 0.87546874, 0.18232156],\n",
" [ 1.84054963, 1.54756251, 0.91629073, 0.18232156],\n",
" [ 1.79175947, 1.45861502, 0.87546874, 0.18232156],\n",
" [ 2.07944154, 1.43508453, 1.74046617, 0.87546874],\n",
" [ 2.00148 , 1.43508453, 1.70474809, 0.91629073],\n",
" [ 2.06686276, 1.41098697, 1.77495235, 0.91629073],\n",
" [ 1.87180218, 1.19392247, 1.60943791, 0.83290912],\n",
" [ 2.01490302, 1.33500107, 1.7227666 , 0.91629073],\n",
" [ 1.90210753, 1.33500107, 1.70474809, 0.83290912],\n",
" [ 1.98787435, 1.45861502, 1.74046617, 0.95551145],\n",
" [ 1.77495235, 1.22377543, 1.45861502, 0.69314718],\n",
" [ 2.02814825, 1.36097655, 1.7227666 , 0.83290912],\n",
" [ 1.82454929, 1.30833282, 1.58923521, 0.87546874],\n",
" [ 1.79175947, 1.09861229, 1.5040774 , 0.69314718],\n",
" [ 1.93152141, 1.38629436, 1.64865863, 0.91629073],\n",
" [ 1.94591015, 1.16315081, 1.60943791, 0.69314718],\n",
" [ 1.96009478, 1.36097655, 1.74046617, 0.87546874],\n",
" [ 1.88706965, 1.36097655, 1.5260563 , 0.83290912],\n",
" [ 2.04122033, 1.41098697, 1.68639895, 0.87546874],\n",
" [ 1.88706965, 1.38629436, 1.70474809, 0.91629073],\n",
" [ 1.91692261, 1.30833282, 1.62924054, 0.69314718],\n",
" [ 1.97408103, 1.16315081, 1.70474809, 0.91629073],\n",
" [ 1.88706965, 1.25276297, 1.58923521, 0.74193734],\n",
" [ 1.93152141, 1.43508453, 1.75785792, 1.02961942],\n",
" [ 1.96009478, 1.33500107, 1.60943791, 0.83290912],\n",
" [ 1.98787435, 1.25276297, 1.77495235, 0.91629073],\n",
" [ 1.96009478, 1.33500107, 1.74046617, 0.78845736],\n",
" [ 2.00148 , 1.36097655, 1.66770682, 0.83290912],\n",
" [ 2.02814825, 1.38629436, 1.68639895, 0.87546874],\n",
" [ 2.05412373, 1.33500107, 1.75785792, 0.87546874],\n",
" [ 2.04122033, 1.38629436, 1.79175947, 0.99325177],\n",
" [ 1.94591015, 1.36097655, 1.70474809, 0.91629073],\n",
" [ 1.90210753, 1.28093385, 1.5040774 , 0.69314718],\n",
" [ 1.87180218, 1.22377543, 1.56861592, 0.74193734],\n",
" [ 1.87180218, 1.22377543, 1.54756251, 0.69314718],\n",
" [ 1.91692261, 1.30833282, 1.58923521, 0.78845736],\n",
" [ 1.94591015, 1.30833282, 1.80828877, 0.95551145],\n",
" [ 1.85629799, 1.38629436, 1.70474809, 0.91629073],\n",
" [ 1.94591015, 1.48160454, 1.70474809, 0.95551145],\n",
" [ 2.04122033, 1.41098697, 1.74046617, 0.91629073],\n",
" [ 1.98787435, 1.19392247, 1.68639895, 0.83290912],\n",
" [ 1.88706965, 1.38629436, 1.62924054, 0.83290912],\n",
" [ 1.87180218, 1.25276297, 1.60943791, 0.83290912],\n",
" [ 1.87180218, 1.28093385, 1.68639895, 0.78845736],\n",
" [ 1.96009478, 1.38629436, 1.7227666 , 0.87546874],\n",
" [ 1.91692261, 1.28093385, 1.60943791, 0.78845736],\n",
" [ 1.79175947, 1.19392247, 1.45861502, 0.69314718],\n",
" [ 1.88706965, 1.30833282, 1.64865863, 0.83290912],\n",
" [ 1.90210753, 1.38629436, 1.64865863, 0.78845736],\n",
" [ 1.90210753, 1.36097655, 1.64865863, 0.83290912],\n",
" [ 1.97408103, 1.36097655, 1.66770682, 0.83290912],\n",
" [ 1.80828877, 1.25276297, 1.38629436, 0.74193734],\n",
" [ 1.90210753, 1.33500107, 1.62924054, 0.83290912],\n",
" [ 1.98787435, 1.45861502, 1.94591015, 1.25276297],\n",
" [ 1.91692261, 1.30833282, 1.80828877, 1.06471074],\n",
" [ 2.09186406, 1.38629436, 1.93152141, 1.13140211],\n",
" [ 1.98787435, 1.36097655, 1.88706965, 1.02961942],\n",
" [ 2.01490302, 1.38629436, 1.91692261, 1.16315081],\n",
" [ 2.1517622 , 1.38629436, 2.02814825, 1.13140211],\n",
" [ 1.77495235, 1.25276297, 1.70474809, 0.99325177],\n",
" [ 2.11625551, 1.36097655, 1.98787435, 1.02961942],\n",
" [ 2.04122033, 1.25276297, 1.91692261, 1.02961942],\n",
" [ 2.10413415, 1.5260563 , 1.96009478, 1.25276297],\n",
" [ 2.01490302, 1.43508453, 1.80828877, 1.09861229],\n",
" [ 2.00148 , 1.30833282, 1.84054963, 1.06471074],\n",
" [ 2.05412373, 1.38629436, 1.87180218, 1.13140211],\n",
" [ 1.90210753, 1.25276297, 1.79175947, 1.09861229],\n",
" [ 1.91692261, 1.33500107, 1.80828877, 1.22377543],\n",
" [ 2.00148 , 1.43508453, 1.84054963, 1.19392247],\n",
" [ 2.01490302, 1.38629436, 1.87180218, 1.02961942],\n",
" [ 2.16332303, 1.56861592, 2.04122033, 1.16315081],\n",
" [ 2.16332303, 1.28093385, 2.06686276, 1.19392247],\n",
" [ 1.94591015, 1.16315081, 1.79175947, 0.91629073],\n",
" [ 2.06686276, 1.43508453, 1.90210753, 1.19392247],\n",
" [ 1.88706965, 1.33500107, 1.77495235, 1.09861229],\n",
" [ 2.16332303, 1.33500107, 2.04122033, 1.09861229],\n",
" [ 1.98787435, 1.30833282, 1.77495235, 1.02961942],\n",
" [ 2.04122033, 1.45861502, 1.90210753, 1.13140211],\n",
" [ 2.10413415, 1.43508453, 1.94591015, 1.02961942],\n",
" [ 1.97408103, 1.33500107, 1.75785792, 1.02961942],\n",
" [ 1.96009478, 1.38629436, 1.77495235, 1.02961942],\n",
" [ 2.00148 , 1.33500107, 1.88706965, 1.13140211],\n",
" [ 2.10413415, 1.38629436, 1.91692261, 0.95551145],\n",
" [ 2.12823171, 1.33500107, 1.96009478, 1.06471074],\n",
" [ 2.18605128, 1.56861592, 2.00148 , 1.09861229],\n",
" [ 2.00148 , 1.33500107, 1.88706965, 1.16315081],\n",
" [ 1.98787435, 1.33500107, 1.80828877, 0.91629073],\n",
" [ 1.96009478, 1.28093385, 1.88706965, 0.87546874],\n",
" [ 2.16332303, 1.38629436, 1.96009478, 1.19392247],\n",
" [ 1.98787435, 1.48160454, 1.88706965, 1.22377543],\n",
" [ 2.00148 , 1.41098697, 1.87180218, 1.02961942],\n",
" [ 1.94591015, 1.38629436, 1.75785792, 1.02961942],\n",
" [ 2.06686276, 1.41098697, 1.85629799, 1.13140211],\n",
" [ 2.04122033, 1.41098697, 1.88706965, 1.22377543],\n",
" [ 2.06686276, 1.41098697, 1.80828877, 1.19392247],\n",
" [ 1.91692261, 1.30833282, 1.80828877, 1.06471074],\n",
" [ 2.05412373, 1.43508453, 1.93152141, 1.19392247],\n",
" [ 2.04122033, 1.45861502, 1.90210753, 1.25276297],\n",
" [ 2.04122033, 1.38629436, 1.82454929, 1.19392247],\n",
" [ 1.98787435, 1.25276297, 1.79175947, 1.06471074],\n",
" [ 2.01490302, 1.38629436, 1.82454929, 1.09861229],\n",
" [ 1.97408103, 1.48160454, 1.85629799, 1.19392247],\n",
" [ 1.93152141, 1.38629436, 1.80828877, 1.02961942]])"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.preprocessing import FunctionTransformer\n",
"FunctionTransformer(np.log1p).fit_transform(iris.data)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 特征选择\n",
"\n",
"* 特征是否发散:\n",
"\n",
"如果一个特征不发散,例如方差接近于0,也就是说样本在这个特征上基本上没有差异,这个特征对于样本的区分并没有什么用。\n",
"\n",
"* 特征与目标的相关性:\n",
"\n",
"这点比较显见,与目标相关性高的特征,应当优选选择。除方差法外,本文介绍的其他方法均从相关性考虑。\n",
"\n",
"根据特征选择的形式又可以将特征选择方法分为3种:\n",
"* Filter:过滤法,\n",
"按照发散性或者相关性对各个特征进行评分,设定阈值或者待选择阈值的个数,选择特征。\n",
"* Wrapper:包装法,\n",
"根据目标函数(通常是预测效果评分),每次选择若干特征,或者排除若干特征。\n",
"* Embedded:嵌入法,\n",
"先使用某些机器学习的算法和模型进行训练,得到各个特征的权值系数,根据系数从大到小选择特征。类似于Filter方法,但是是通过训练来确定特征的优劣。\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Filter"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 1.4],\n",
" [ 1.4],\n",
" [ 1.3],\n",
" [ 1.5],\n",
" [ 1.4],\n",
" [ 1.7],\n",
" [ 1.4],\n",
" [ 1.5],\n",
" [ 1.4],\n",
" [ 1.5],\n",
" [ 1.5],\n",
" [ 1.6],\n",
" [ 1.4],\n",
" [ 1.1],\n",
" [ 1.2],\n",
" [ 1.5],\n",
" [ 1.3],\n",
" [ 1.4],\n",
" [ 1.7],\n",
" [ 1.5],\n",
" [ 1.7],\n",
" [ 1.5],\n",
" [ 1. ],\n",
" [ 1.7],\n",
" [ 1.9],\n",
" [ 1.6],\n",
" [ 1.6],\n",
" [ 1.5],\n",
" [ 1.4],\n",
" [ 1.6],\n",
" [ 1.6],\n",
" [ 1.5],\n",
" [ 1.5],\n",
" [ 1.4],\n",
" [ 1.5],\n",
" [ 1.2],\n",
" [ 1.3],\n",
" [ 1.5],\n",
" [ 1.3],\n",
" [ 1.5],\n",
" [ 1.3],\n",
" [ 1.3],\n",
" [ 1.3],\n",
" [ 1.6],\n",
" [ 1.9],\n",
" [ 1.4],\n",
" [ 1.6],\n",
" [ 1.4],\n",
" [ 1.5],\n",
" [ 1.4],\n",
" [ 4.7],\n",
" [ 4.5],\n",
" [ 4.9],\n",
" [ 4. ],\n",
" [ 4.6],\n",
" [ 4.5],\n",
" [ 4.7],\n",
" [ 3.3],\n",
" [ 4.6],\n",
" [ 3.9],\n",
" [ 3.5],\n",
" [ 4.2],\n",
" [ 4. ],\n",
" [ 4.7],\n",
" [ 3.6],\n",
" [ 4.4],\n",
" [ 4.5],\n",
" [ 4.1],\n",
" [ 4.5],\n",
" [ 3.9],\n",
" [ 4.8],\n",
" [ 4. ],\n",
" [ 4.9],\n",
" [ 4.7],\n",
" [ 4.3],\n",
" [ 4.4],\n",
" [ 4.8],\n",
" [ 5. ],\n",
" [ 4.5],\n",
" [ 3.5],\n",
" [ 3.8],\n",
" [ 3.7],\n",
" [ 3.9],\n",
" [ 5.1],\n",
" [ 4.5],\n",
" [ 4.5],\n",
" [ 4.7],\n",
" [ 4.4],\n",
" [ 4.1],\n",
" [ 4. ],\n",
" [ 4.4],\n",
" [ 4.6],\n",
" [ 4. ],\n",
" [ 3.3],\n",
" [ 4.2],\n",
" [ 4.2],\n",
" [ 4.2],\n",
" [ 4.3],\n",
" [ 3. ],\n",
" [ 4.1],\n",
" [ 6. ],\n",
" [ 5.1],\n",
" [ 5.9],\n",
" [ 5.6],\n",
" [ 5.8],\n",
" [ 6.6],\n",
" [ 4.5],\n",
" [ 6.3],\n",
" [ 5.8],\n",
" [ 6.1],\n",
" [ 5.1],\n",
" [ 5.3],\n",
" [ 5.5],\n",
" [ 5. ],\n",
" [ 5.1],\n",
" [ 5.3],\n",
" [ 5.5],\n",
" [ 6.7],\n",
" [ 6.9],\n",
" [ 5. ],\n",
" [ 5.7],\n",
" [ 4.9],\n",
" [ 6.7],\n",
" [ 4.9],\n",
" [ 5.7],\n",
" [ 6. ],\n",
" [ 4.8],\n",
" [ 4.9],\n",
" [ 5.6],\n",
" [ 5.8],\n",
" [ 6.1],\n",
" [ 6.4],\n",
" [ 5.6],\n",
" [ 5.1],\n",
" [ 5.6],\n",
" [ 6.1],\n",
" [ 5.6],\n",
" [ 5.5],\n",
" [ 4.8],\n",
" [ 5.4],\n",
" [ 5.6],\n",
" [ 5.1],\n",
" [ 5.1],\n",
" [ 5.9],\n",
" [ 5.7],\n",
" [ 5.2],\n",
" [ 5. ],\n",
" [ 5.2],\n",
" [ 5.4],\n",
" [ 5.1]])"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Filter: 方差选择法\n",
"from sklearn.feature_selection import VarianceThreshold\n",
"VarianceThreshold(threshold=3).fit_transform(iris.data)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 1.4, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 1.3, 0.2],\n",
" [ 1.5, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 1.7, 0.4],\n",
" [ 1.4, 0.3],\n",
" [ 1.5, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 1.5, 0.1],\n",
" [ 1.5, 0.2],\n",
" [ 1.6, 0.2],\n",
" [ 1.4, 0.1],\n",
" [ 1.1, 0.1],\n",
" [ 1.2, 0.2],\n",
" [ 1.5, 0.4],\n",
" [ 1.3, 0.4],\n",
" [ 1.4, 0.3],\n",
" [ 1.7, 0.3],\n",
" [ 1.5, 0.3],\n",
" [ 1.7, 0.2],\n",
" [ 1.5, 0.4],\n",
" [ 1. , 0.2],\n",
" [ 1.7, 0.5],\n",
" [ 1.9, 0.2],\n",
" [ 1.6, 0.2],\n",
" [ 1.6, 0.4],\n",
" [ 1.5, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 1.6, 0.2],\n",
" [ 1.6, 0.2],\n",
" [ 1.5, 0.4],\n",
" [ 1.5, 0.1],\n",
" [ 1.4, 0.2],\n",
" [ 1.5, 0.1],\n",
" [ 1.2, 0.2],\n",
" [ 1.3, 0.2],\n",
" [ 1.5, 0.1],\n",
" [ 1.3, 0.2],\n",
" [ 1.5, 0.2],\n",
" [ 1.3, 0.3],\n",
" [ 1.3, 0.3],\n",
" [ 1.3, 0.2],\n",
" [ 1.6, 0.6],\n",
" [ 1.9, 0.4],\n",
" [ 1.4, 0.3],\n",
" [ 1.6, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 1.5, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 4.7, 1.4],\n",
" [ 4.5, 1.5],\n",
" [ 4.9, 1.5],\n",
" [ 4. , 1.3],\n",
" [ 4.6, 1.5],\n",
" [ 4.5, 1.3],\n",
" [ 4.7, 1.6],\n",
" [ 3.3, 1. ],\n",
" [ 4.6, 1.3],\n",
" [ 3.9, 1.4],\n",
" [ 3.5, 1. ],\n",
" [ 4.2, 1.5],\n",
" [ 4. , 1. ],\n",
" [ 4.7, 1.4],\n",
" [ 3.6, 1.3],\n",
" [ 4.4, 1.4],\n",
" [ 4.5, 1.5],\n",
" [ 4.1, 1. ],\n",
" [ 4.5, 1.5],\n",
" [ 3.9, 1.1],\n",
" [ 4.8, 1.8],\n",
" [ 4. , 1.3],\n",
" [ 4.9, 1.5],\n",
" [ 4.7, 1.2],\n",
" [ 4.3, 1.3],\n",
" [ 4.4, 1.4],\n",
" [ 4.8, 1.4],\n",
" [ 5. , 1.7],\n",
" [ 4.5, 1.5],\n",
" [ 3.5, 1. ],\n",
" [ 3.8, 1.1],\n",
" [ 3.7, 1. ],\n",
" [ 3.9, 1.2],\n",
" [ 5.1, 1.6],\n",
" [ 4.5, 1.5],\n",
" [ 4.5, 1.6],\n",
" [ 4.7, 1.5],\n",
" [ 4.4, 1.3],\n",
" [ 4.1, 1.3],\n",
" [ 4. , 1.3],\n",
" [ 4.4, 1.2],\n",
" [ 4.6, 1.4],\n",
" [ 4. , 1.2],\n",
" [ 3.3, 1. ],\n",
" [ 4.2, 1.3],\n",
" [ 4.2, 1.2],\n",
" [ 4.2, 1.3],\n",
" [ 4.3, 1.3],\n",
" [ 3. , 1.1],\n",
" [ 4.1, 1.3],\n",
" [ 6. , 2.5],\n",
" [ 5.1, 1.9],\n",
" [ 5.9, 2.1],\n",
" [ 5.6, 1.8],\n",
" [ 5.8, 2.2],\n",
" [ 6.6, 2.1],\n",
" [ 4.5, 1.7],\n",
" [ 6.3, 1.8],\n",
" [ 5.8, 1.8],\n",
" [ 6.1, 2.5],\n",
" [ 5.1, 2. ],\n",
" [ 5.3, 1.9],\n",
" [ 5.5, 2.1],\n",
" [ 5. , 2. ],\n",
" [ 5.1, 2.4],\n",
" [ 5.3, 2.3],\n",
" [ 5.5, 1.8],\n",
" [ 6.7, 2.2],\n",
" [ 6.9, 2.3],\n",
" [ 5. , 1.5],\n",
" [ 5.7, 2.3],\n",
" [ 4.9, 2. ],\n",
" [ 6.7, 2. ],\n",
" [ 4.9, 1.8],\n",
" [ 5.7, 2.1],\n",
" [ 6. , 1.8],\n",
" [ 4.8, 1.8],\n",
" [ 4.9, 1.8],\n",
" [ 5.6, 2.1],\n",
" [ 5.8, 1.6],\n",
" [ 6.1, 1.9],\n",
" [ 6.4, 2. ],\n",
" [ 5.6, 2.2],\n",
" [ 5.1, 1.5],\n",
" [ 5.6, 1.4],\n",
" [ 6.1, 2.3],\n",
" [ 5.6, 2.4],\n",
" [ 5.5, 1.8],\n",
" [ 4.8, 1.8],\n",
" [ 5.4, 2.1],\n",
" [ 5.6, 2.4],\n",
" [ 5.1, 2.3],\n",
" [ 5.1, 1.9],\n",
" [ 5.9, 2.3],\n",
" [ 5.7, 2.5],\n",
" [ 5.2, 2.3],\n",
" [ 5. , 1.9],\n",
" [ 5.2, 2. ],\n",
" [ 5.4, 2.3],\n",
" [ 5.1, 1.8]])"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Filter: 相关系数法\n",
"from sklearn.feature_selection import SelectKBest\n",
"from scipy.stats import pearsonr\n",
"SelectKBest(lambda X, Y: \\\n",
" tuple(map(tuple,np.array(list(map(lambda x:\\\n",
" pearsonr(x, Y), X.T))).T)), k=2)\\\n",
" .fit_transform(iris.data, iris.target)\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 1.4, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 1.3, 0.2],\n",
" [ 1.5, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 1.7, 0.4],\n",
" [ 1.4, 0.3],\n",
" [ 1.5, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 1.5, 0.1],\n",
" [ 1.5, 0.2],\n",
" [ 1.6, 0.2],\n",
" [ 1.4, 0.1],\n",
" [ 1.1, 0.1],\n",
" [ 1.2, 0.2],\n",
" [ 1.5, 0.4],\n",
" [ 1.3, 0.4],\n",
" [ 1.4, 0.3],\n",
" [ 1.7, 0.3],\n",
" [ 1.5, 0.3],\n",
" [ 1.7, 0.2],\n",
" [ 1.5, 0.4],\n",
" [ 1. , 0.2],\n",
" [ 1.7, 0.5],\n",
" [ 1.9, 0.2],\n",
" [ 1.6, 0.2],\n",
" [ 1.6, 0.4],\n",
" [ 1.5, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 1.6, 0.2],\n",
" [ 1.6, 0.2],\n",
" [ 1.5, 0.4],\n",
" [ 1.5, 0.1],\n",
" [ 1.4, 0.2],\n",
" [ 1.5, 0.1],\n",
" [ 1.2, 0.2],\n",
" [ 1.3, 0.2],\n",
" [ 1.5, 0.1],\n",
" [ 1.3, 0.2],\n",
" [ 1.5, 0.2],\n",
" [ 1.3, 0.3],\n",
" [ 1.3, 0.3],\n",
" [ 1.3, 0.2],\n",
" [ 1.6, 0.6],\n",
" [ 1.9, 0.4],\n",
" [ 1.4, 0.3],\n",
" [ 1.6, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 1.5, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 4.7, 1.4],\n",
" [ 4.5, 1.5],\n",
" [ 4.9, 1.5],\n",
" [ 4. , 1.3],\n",
" [ 4.6, 1.5],\n",
" [ 4.5, 1.3],\n",
" [ 4.7, 1.6],\n",
" [ 3.3, 1. ],\n",
" [ 4.6, 1.3],\n",
" [ 3.9, 1.4],\n",
" [ 3.5, 1. ],\n",
" [ 4.2, 1.5],\n",
" [ 4. , 1. ],\n",
" [ 4.7, 1.4],\n",
" [ 3.6, 1.3],\n",
" [ 4.4, 1.4],\n",
" [ 4.5, 1.5],\n",
" [ 4.1, 1. ],\n",
" [ 4.5, 1.5],\n",
" [ 3.9, 1.1],\n",
" [ 4.8, 1.8],\n",
" [ 4. , 1.3],\n",
" [ 4.9, 1.5],\n",
" [ 4.7, 1.2],\n",
" [ 4.3, 1.3],\n",
" [ 4.4, 1.4],\n",
" [ 4.8, 1.4],\n",
" [ 5. , 1.7],\n",
" [ 4.5, 1.5],\n",
" [ 3.5, 1. ],\n",
" [ 3.8, 1.1],\n",
" [ 3.7, 1. ],\n",
" [ 3.9, 1.2],\n",
" [ 5.1, 1.6],\n",
" [ 4.5, 1.5],\n",
" [ 4.5, 1.6],\n",
" [ 4.7, 1.5],\n",
" [ 4.4, 1.3],\n",
" [ 4.1, 1.3],\n",
" [ 4. , 1.3],\n",
" [ 4.4, 1.2],\n",
" [ 4.6, 1.4],\n",
" [ 4. , 1.2],\n",
" [ 3.3, 1. ],\n",
" [ 4.2, 1.3],\n",
" [ 4.2, 1.2],\n",
" [ 4.2, 1.3],\n",
" [ 4.3, 1.3],\n",
" [ 3. , 1.1],\n",
" [ 4.1, 1.3],\n",
" [ 6. , 2.5],\n",
" [ 5.1, 1.9],\n",
" [ 5.9, 2.1],\n",
" [ 5.6, 1.8],\n",
" [ 5.8, 2.2],\n",
" [ 6.6, 2.1],\n",
" [ 4.5, 1.7],\n",
" [ 6.3, 1.8],\n",
" [ 5.8, 1.8],\n",
" [ 6.1, 2.5],\n",
" [ 5.1, 2. ],\n",
" [ 5.3, 1.9],\n",
" [ 5.5, 2.1],\n",
" [ 5. , 2. ],\n",
" [ 5.1, 2.4],\n",
" [ 5.3, 2.3],\n",
" [ 5.5, 1.8],\n",
" [ 6.7, 2.2],\n",
" [ 6.9, 2.3],\n",
" [ 5. , 1.5],\n",
" [ 5.7, 2.3],\n",
" [ 4.9, 2. ],\n",
" [ 6.7, 2. ],\n",
" [ 4.9, 1.8],\n",
" [ 5.7, 2.1],\n",
" [ 6. , 1.8],\n",
" [ 4.8, 1.8],\n",
" [ 4.9, 1.8],\n",
" [ 5.6, 2.1],\n",
" [ 5.8, 1.6],\n",
" [ 6.1, 1.9],\n",
" [ 6.4, 2. ],\n",
" [ 5.6, 2.2],\n",
" [ 5.1, 1.5],\n",
" [ 5.6, 1.4],\n",
" [ 6.1, 2.3],\n",
" [ 5.6, 2.4],\n",
" [ 5.5, 1.8],\n",
" [ 4.8, 1.8],\n",
" [ 5.4, 2.1],\n",
" [ 5.6, 2.4],\n",
" [ 5.1, 2.3],\n",
" [ 5.1, 1.9],\n",
" [ 5.9, 2.3],\n",
" [ 5.7, 2.5],\n",
" [ 5.2, 2.3],\n",
" [ 5. , 1.9],\n",
" [ 5.2, 2. ],\n",
" [ 5.4, 2.3],\n",
" [ 5.1, 1.8]])"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Filter: 卡方检验\n",
"from sklearn.feature_selection import SelectKBest\n",
"from sklearn.feature_selection import chi2\n",
"\n",
"SelectKBest(chi2, k=2).fit_transform(iris.data, iris.target)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Filter: 互信息法\n",
"# from sklearn.feature_selection import SelectKBest\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 3.5, 0.2],\n",
" [ 3. , 0.2],\n",
" [ 3.2, 0.2],\n",
" [ 3.1, 0.2],\n",
" [ 3.6, 0.2],\n",
" [ 3.9, 0.4],\n",
" [ 3.4, 0.3],\n",
" [ 3.4, 0.2],\n",
" [ 2.9, 0.2],\n",
" [ 3.1, 0.1],\n",
" [ 3.7, 0.2],\n",
" [ 3.4, 0.2],\n",
" [ 3. , 0.1],\n",
" [ 3. , 0.1],\n",
" [ 4. , 0.2],\n",
" [ 4.4, 0.4],\n",
" [ 3.9, 0.4],\n",
" [ 3.5, 0.3],\n",
" [ 3.8, 0.3],\n",
" [ 3.8, 0.3],\n",
" [ 3.4, 0.2],\n",
" [ 3.7, 0.4],\n",
" [ 3.6, 0.2],\n",
" [ 3.3, 0.5],\n",
" [ 3.4, 0.2],\n",
" [ 3. , 0.2],\n",
" [ 3.4, 0.4],\n",
" [ 3.5, 0.2],\n",
" [ 3.4, 0.2],\n",
" [ 3.2, 0.2],\n",
" [ 3.1, 0.2],\n",
" [ 3.4, 0.4],\n",
" [ 4.1, 0.1],\n",
" [ 4.2, 0.2],\n",
" [ 3.1, 0.1],\n",
" [ 3.2, 0.2],\n",
" [ 3.5, 0.2],\n",
" [ 3.1, 0.1],\n",
" [ 3. , 0.2],\n",
" [ 3.4, 0.2],\n",
" [ 3.5, 0.3],\n",
" [ 2.3, 0.3],\n",
" [ 3.2, 0.2],\n",
" [ 3.5, 0.6],\n",
" [ 3.8, 0.4],\n",
" [ 3. , 0.3],\n",
" [ 3.8, 0.2],\n",
" [ 3.2, 0.2],\n",
" [ 3.7, 0.2],\n",
" [ 3.3, 0.2],\n",
" [ 3.2, 1.4],\n",
" [ 3.2, 1.5],\n",
" [ 3.1, 1.5],\n",
" [ 2.3, 1.3],\n",
" [ 2.8, 1.5],\n",
" [ 2.8, 1.3],\n",
" [ 3.3, 1.6],\n",
" [ 2.4, 1. ],\n",
" [ 2.9, 1.3],\n",
" [ 2.7, 1.4],\n",
" [ 2. , 1. ],\n",
" [ 3. , 1.5],\n",
" [ 2.2, 1. ],\n",
" [ 2.9, 1.4],\n",
" [ 2.9, 1.3],\n",
" [ 3.1, 1.4],\n",
" [ 3. , 1.5],\n",
" [ 2.7, 1. ],\n",
" [ 2.2, 1.5],\n",
" [ 2.5, 1.1],\n",
" [ 3.2, 1.8],\n",
" [ 2.8, 1.3],\n",
" [ 2.5, 1.5],\n",
" [ 2.8, 1.2],\n",
" [ 2.9, 1.3],\n",
" [ 3. , 1.4],\n",
" [ 2.8, 1.4],\n",
" [ 3. , 1.7],\n",
" [ 2.9, 1.5],\n",
" [ 2.6, 1. ],\n",
" [ 2.4, 1.1],\n",
" [ 2.4, 1. ],\n",
" [ 2.7, 1.2],\n",
" [ 2.7, 1.6],\n",
" [ 3. , 1.5],\n",
" [ 3.4, 1.6],\n",
" [ 3.1, 1.5],\n",
" [ 2.3, 1.3],\n",
" [ 3. , 1.3],\n",
" [ 2.5, 1.3],\n",
" [ 2.6, 1.2],\n",
" [ 3. , 1.4],\n",
" [ 2.6, 1.2],\n",
" [ 2.3, 1. ],\n",
" [ 2.7, 1.3],\n",
" [ 3. , 1.2],\n",
" [ 2.9, 1.3],\n",
" [ 2.9, 1.3],\n",
" [ 2.5, 1.1],\n",
" [ 2.8, 1.3],\n",
" [ 3.3, 2.5],\n",
" [ 2.7, 1.9],\n",
" [ 3. , 2.1],\n",
" [ 2.9, 1.8],\n",
" [ 3. , 2.2],\n",
" [ 3. , 2.1],\n",
" [ 2.5, 1.7],\n",
" [ 2.9, 1.8],\n",
" [ 2.5, 1.8],\n",
" [ 3.6, 2.5],\n",
" [ 3.2, 2. ],\n",
" [ 2.7, 1.9],\n",
" [ 3. , 2.1],\n",
" [ 2.5, 2. ],\n",
" [ 2.8, 2.4],\n",
" [ 3.2, 2.3],\n",
" [ 3. , 1.8],\n",
" [ 3.8, 2.2],\n",
" [ 2.6, 2.3],\n",
" [ 2.2, 1.5],\n",
" [ 3.2, 2.3],\n",
" [ 2.8, 2. ],\n",
" [ 2.8, 2. ],\n",
" [ 2.7, 1.8],\n",
" [ 3.3, 2.1],\n",
" [ 3.2, 1.8],\n",
" [ 2.8, 1.8],\n",
" [ 3. , 1.8],\n",
" [ 2.8, 2.1],\n",
" [ 3. , 1.6],\n",
" [ 2.8, 1.9],\n",
" [ 3.8, 2. ],\n",
" [ 2.8, 2.2],\n",
" [ 2.8, 1.5],\n",
" [ 2.6, 1.4],\n",
" [ 3. , 2.3],\n",
" [ 3.4, 2.4],\n",
" [ 3.1, 1.8],\n",
" [ 3. , 1.8],\n",
" [ 3.1, 2.1],\n",
" [ 3.1, 2.4],\n",
" [ 3.1, 2.3],\n",
" [ 2.7, 1.9],\n",
" [ 3.2, 2.3],\n",
" [ 3.3, 2.5],\n",
" [ 3. , 2.3],\n",
" [ 2.5, 1.9],\n",
" [ 3. , 2. ],\n",
" [ 3.4, 2.3],\n",
" [ 3. , 1.8]])"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Warpper: 递归特征消除\n",
"from sklearn.feature_selection import RFE\n",
"from sklearn.linear_model import LogisticRegression\n",
"\n",
"RFE(estimator=LogisticRegression(), n_features_to_select=2)\\\n",
" .fit_transform(iris.data,iris.target)\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Embedded\n",
"使用带惩罚项的基模型,除了筛选出特征外,同时也进行了降维。\n",
"\n",
"基于L1惩罚项,L1惩罚项降维的原理在于保留多个对目标值具有同等相关性的特征中的一个,所以没选到的特征不代表不重要。\n",
"\n",
"结合L2惩罚项来优化,具体操作为:\n",
"若一个特征在L1中的权值为1,选择在L2中权值差别不大且在L1中权值为0的特征构成同类集合,将这一集合中的特征平分L1中的权值.\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 5.1, 3.5, 1.4],\n",
" [ 4.9, 3. , 1.4],\n",
" [ 4.7, 3.2, 1.3],\n",
" [ 4.6, 3.1, 1.5],\n",
" [ 5. , 3.6, 1.4],\n",
" [ 5.4, 3.9, 1.7],\n",
" [ 4.6, 3.4, 1.4],\n",
" [ 5. , 3.4, 1.5],\n",
" [ 4.4, 2.9, 1.4],\n",
" [ 4.9, 3.1, 1.5],\n",
" [ 5.4, 3.7, 1.5],\n",
" [ 4.8, 3.4, 1.6],\n",
" [ 4.8, 3. , 1.4],\n",
" [ 4.3, 3. , 1.1],\n",
" [ 5.8, 4. , 1.2],\n",
" [ 5.7, 4.4, 1.5],\n",
" [ 5.4, 3.9, 1.3],\n",
" [ 5.1, 3.5, 1.4],\n",
" [ 5.7, 3.8, 1.7],\n",
" [ 5.1, 3.8, 1.5],\n",
" [ 5.4, 3.4, 1.7],\n",
" [ 5.1, 3.7, 1.5],\n",
" [ 4.6, 3.6, 1. ],\n",
" [ 5.1, 3.3, 1.7],\n",
" [ 4.8, 3.4, 1.9],\n",
" [ 5. , 3. , 1.6],\n",
" [ 5. , 3.4, 1.6],\n",
" [ 5.2, 3.5, 1.5],\n",
" [ 5.2, 3.4, 1.4],\n",
" [ 4.7, 3.2, 1.6],\n",
" [ 4.8, 3.1, 1.6],\n",
" [ 5.4, 3.4, 1.5],\n",
" [ 5.2, 4.1, 1.5],\n",
" [ 5.5, 4.2, 1.4],\n",
" [ 4.9, 3.1, 1.5],\n",
" [ 5. , 3.2, 1.2],\n",
" [ 5.5, 3.5, 1.3],\n",
" [ 4.9, 3.1, 1.5],\n",
" [ 4.4, 3. , 1.3],\n",
" [ 5.1, 3.4, 1.5],\n",
" [ 5. , 3.5, 1.3],\n",
" [ 4.5, 2.3, 1.3],\n",
" [ 4.4, 3.2, 1.3],\n",
" [ 5. , 3.5, 1.6],\n",
" [ 5.1, 3.8, 1.9],\n",
" [ 4.8, 3. , 1.4],\n",
" [ 5.1, 3.8, 1.6],\n",
" [ 4.6, 3.2, 1.4],\n",
" [ 5.3, 3.7, 1.5],\n",
" [ 5. , 3.3, 1.4],\n",
" [ 7. , 3.2, 4.7],\n",
" [ 6.4, 3.2, 4.5],\n",
" [ 6.9, 3.1, 4.9],\n",
" [ 5.5, 2.3, 4. ],\n",
" [ 6.5, 2.8, 4.6],\n",
" [ 5.7, 2.8, 4.5],\n",
" [ 6.3, 3.3, 4.7],\n",
" [ 4.9, 2.4, 3.3],\n",
" [ 6.6, 2.9, 4.6],\n",
" [ 5.2, 2.7, 3.9],\n",
" [ 5. , 2. , 3.5],\n",
" [ 5.9, 3. , 4.2],\n",
" [ 6. , 2.2, 4. ],\n",
" [ 6.1, 2.9, 4.7],\n",
" [ 5.6, 2.9, 3.6],\n",
" [ 6.7, 3.1, 4.4],\n",
" [ 5.6, 3. , 4.5],\n",
" [ 5.8, 2.7, 4.1],\n",
" [ 6.2, 2.2, 4.5],\n",
" [ 5.6, 2.5, 3.9],\n",
" [ 5.9, 3.2, 4.8],\n",
" [ 6.1, 2.8, 4. ],\n",
" [ 6.3, 2.5, 4.9],\n",
" [ 6.1, 2.8, 4.7],\n",
" [ 6.4, 2.9, 4.3],\n",
" [ 6.6, 3. , 4.4],\n",
" [ 6.8, 2.8, 4.8],\n",
" [ 6.7, 3. , 5. ],\n",
" [ 6. , 2.9, 4.5],\n",
" [ 5.7, 2.6, 3.5],\n",
" [ 5.5, 2.4, 3.8],\n",
" [ 5.5, 2.4, 3.7],\n",
" [ 5.8, 2.7, 3.9],\n",
" [ 6. , 2.7, 5.1],\n",
" [ 5.4, 3. , 4.5],\n",
" [ 6. , 3.4, 4.5],\n",
" [ 6.7, 3.1, 4.7],\n",
" [ 6.3, 2.3, 4.4],\n",
" [ 5.6, 3. , 4.1],\n",
" [ 5.5, 2.5, 4. ],\n",
" [ 5.5, 2.6, 4.4],\n",
" [ 6.1, 3. , 4.6],\n",
" [ 5.8, 2.6, 4. ],\n",
" [ 5. , 2.3, 3.3],\n",
" [ 5.6, 2.7, 4.2],\n",
" [ 5.7, 3. , 4.2],\n",
" [ 5.7, 2.9, 4.2],\n",
" [ 6.2, 2.9, 4.3],\n",
" [ 5.1, 2.5, 3. ],\n",
" [ 5.7, 2.8, 4.1],\n",
" [ 6.3, 3.3, 6. ],\n",
" [ 5.8, 2.7, 5.1],\n",
" [ 7.1, 3. , 5.9],\n",
" [ 6.3, 2.9, 5.6],\n",
" [ 6.5, 3. , 5.8],\n",
" [ 7.6, 3. , 6.6],\n",
" [ 4.9, 2.5, 4.5],\n",
" [ 7.3, 2.9, 6.3],\n",
" [ 6.7, 2.5, 5.8],\n",
" [ 7.2, 3.6, 6.1],\n",
" [ 6.5, 3.2, 5.1],\n",
" [ 6.4, 2.7, 5.3],\n",
" [ 6.8, 3. , 5.5],\n",
" [ 5.7, 2.5, 5. ],\n",
" [ 5.8, 2.8, 5.1],\n",
" [ 6.4, 3.2, 5.3],\n",
" [ 6.5, 3. , 5.5],\n",
" [ 7.7, 3.8, 6.7],\n",
" [ 7.7, 2.6, 6.9],\n",
" [ 6. , 2.2, 5. ],\n",
" [ 6.9, 3.2, 5.7],\n",
" [ 5.6, 2.8, 4.9],\n",
" [ 7.7, 2.8, 6.7],\n",
" [ 6.3, 2.7, 4.9],\n",
" [ 6.7, 3.3, 5.7],\n",
" [ 7.2, 3.2, 6. ],\n",
" [ 6.2, 2.8, 4.8],\n",
" [ 6.1, 3. , 4.9],\n",
" [ 6.4, 2.8, 5.6],\n",
" [ 7.2, 3. , 5.8],\n",
" [ 7.4, 2.8, 6.1],\n",
" [ 7.9, 3.8, 6.4],\n",
" [ 6.4, 2.8, 5.6],\n",
" [ 6.3, 2.8, 5.1],\n",
" [ 6.1, 2.6, 5.6],\n",
" [ 7.7, 3. , 6.1],\n",
" [ 6.3, 3.4, 5.6],\n",
" [ 6.4, 3.1, 5.5],\n",
" [ 6. , 3. , 4.8],\n",
" [ 6.9, 3.1, 5.4],\n",
" [ 6.7, 3.1, 5.6],\n",
" [ 6.9, 3.1, 5.1],\n",
" [ 5.8, 2.7, 5.1],\n",
" [ 6.8, 3.2, 5.9],\n",
" [ 6.7, 3.3, 5.7],\n",
" [ 6.7, 3. , 5.2],\n",
" [ 6.3, 2.5, 5. ],\n",
" [ 6.5, 3. , 5.2],\n",
" [ 6.2, 3.4, 5.4],\n",
" [ 5.9, 3. , 5.1]])"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Embedded: 基于惩罚模型特征选择法\n",
"from sklearn.feature_selection import SelectFromModel\n",
"from sklearn.linear_model import LogisticRegression\n",
"\n",
"SelectFromModel(LogisticRegression(penalty='l1', C=0.1))\\\n",
" .fit_transform(iris.data,iris.target)\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 1.4, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 1.3, 0.2],\n",
" [ 1.5, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 1.7, 0.4],\n",
" [ 1.4, 0.3],\n",
" [ 1.5, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 1.5, 0.1],\n",
" [ 1.5, 0.2],\n",
" [ 1.6, 0.2],\n",
" [ 1.4, 0.1],\n",
" [ 1.1, 0.1],\n",
" [ 1.2, 0.2],\n",
" [ 1.5, 0.4],\n",
" [ 1.3, 0.4],\n",
" [ 1.4, 0.3],\n",
" [ 1.7, 0.3],\n",
" [ 1.5, 0.3],\n",
" [ 1.7, 0.2],\n",
" [ 1.5, 0.4],\n",
" [ 1. , 0.2],\n",
" [ 1.7, 0.5],\n",
" [ 1.9, 0.2],\n",
" [ 1.6, 0.2],\n",
" [ 1.6, 0.4],\n",
" [ 1.5, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 1.6, 0.2],\n",
" [ 1.6, 0.2],\n",
" [ 1.5, 0.4],\n",
" [ 1.5, 0.1],\n",
" [ 1.4, 0.2],\n",
" [ 1.5, 0.1],\n",
" [ 1.2, 0.2],\n",
" [ 1.3, 0.2],\n",
" [ 1.5, 0.1],\n",
" [ 1.3, 0.2],\n",
" [ 1.5, 0.2],\n",
" [ 1.3, 0.3],\n",
" [ 1.3, 0.3],\n",
" [ 1.3, 0.2],\n",
" [ 1.6, 0.6],\n",
" [ 1.9, 0.4],\n",
" [ 1.4, 0.3],\n",
" [ 1.6, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 1.5, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 4.7, 1.4],\n",
" [ 4.5, 1.5],\n",
" [ 4.9, 1.5],\n",
" [ 4. , 1.3],\n",
" [ 4.6, 1.5],\n",
" [ 4.5, 1.3],\n",
" [ 4.7, 1.6],\n",
" [ 3.3, 1. ],\n",
" [ 4.6, 1.3],\n",
" [ 3.9, 1.4],\n",
" [ 3.5, 1. ],\n",
" [ 4.2, 1.5],\n",
" [ 4. , 1. ],\n",
" [ 4.7, 1.4],\n",
" [ 3.6, 1.3],\n",
" [ 4.4, 1.4],\n",
" [ 4.5, 1.5],\n",
" [ 4.1, 1. ],\n",
" [ 4.5, 1.5],\n",
" [ 3.9, 1.1],\n",
" [ 4.8, 1.8],\n",
" [ 4. , 1.3],\n",
" [ 4.9, 1.5],\n",
" [ 4.7, 1.2],\n",
" [ 4.3, 1.3],\n",
" [ 4.4, 1.4],\n",
" [ 4.8, 1.4],\n",
" [ 5. , 1.7],\n",
" [ 4.5, 1.5],\n",
" [ 3.5, 1. ],\n",
" [ 3.8, 1.1],\n",
" [ 3.7, 1. ],\n",
" [ 3.9, 1.2],\n",
" [ 5.1, 1.6],\n",
" [ 4.5, 1.5],\n",
" [ 4.5, 1.6],\n",
" [ 4.7, 1.5],\n",
" [ 4.4, 1.3],\n",
" [ 4.1, 1.3],\n",
" [ 4. , 1.3],\n",
" [ 4.4, 1.2],\n",
" [ 4.6, 1.4],\n",
" [ 4. , 1.2],\n",
" [ 3.3, 1. ],\n",
" [ 4.2, 1.3],\n",
" [ 4.2, 1.2],\n",
" [ 4.2, 1.3],\n",
" [ 4.3, 1.3],\n",
" [ 3. , 1.1],\n",
" [ 4.1, 1.3],\n",
" [ 6. , 2.5],\n",
" [ 5.1, 1.9],\n",
" [ 5.9, 2.1],\n",
" [ 5.6, 1.8],\n",
" [ 5.8, 2.2],\n",
" [ 6.6, 2.1],\n",
" [ 4.5, 1.7],\n",
" [ 6.3, 1.8],\n",
" [ 5.8, 1.8],\n",
" [ 6.1, 2.5],\n",
" [ 5.1, 2. ],\n",
" [ 5.3, 1.9],\n",
" [ 5.5, 2.1],\n",
" [ 5. , 2. ],\n",
" [ 5.1, 2.4],\n",
" [ 5.3, 2.3],\n",
" [ 5.5, 1.8],\n",
" [ 6.7, 2.2],\n",
" [ 6.9, 2.3],\n",
" [ 5. , 1.5],\n",
" [ 5.7, 2.3],\n",
" [ 4.9, 2. ],\n",
" [ 6.7, 2. ],\n",
" [ 4.9, 1.8],\n",
" [ 5.7, 2.1],\n",
" [ 6. , 1.8],\n",
" [ 4.8, 1.8],\n",
" [ 4.9, 1.8],\n",
" [ 5.6, 2.1],\n",
" [ 5.8, 1.6],\n",
" [ 6.1, 1.9],\n",
" [ 6.4, 2. ],\n",
" [ 5.6, 2.2],\n",
" [ 5.1, 1.5],\n",
" [ 5.6, 1.4],\n",
" [ 6.1, 2.3],\n",
" [ 5.6, 2.4],\n",
" [ 5.5, 1.8],\n",
" [ 4.8, 1.8],\n",
" [ 5.4, 2.1],\n",
" [ 5.6, 2.4],\n",
" [ 5.1, 2.3],\n",
" [ 5.1, 1.9],\n",
" [ 5.9, 2.3],\n",
" [ 5.7, 2.5],\n",
" [ 5.2, 2.3],\n",
" [ 5. , 1.9],\n",
" [ 5.2, 2. ],\n",
" [ 5.4, 2.3],\n",
" [ 5.1, 1.8]])"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Embedded: 基于树模型的特征选择\n",
"from sklearn.feature_selection import SelectFromModel\n",
"from sklearn.ensemble import GradientBoostingClassifier\n",
"\n",
"SelectFromModel(GradientBoostingClassifier())\\\n",
" .fit_transform(iris.data, iris.target)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 0.07817179 0.03503159 0.38008987 0.50670675]\n"
]
},
{
"data": {
"text/plain": [
"array([[ 1.4, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 1.3, 0.2],\n",
" [ 1.5, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 1.7, 0.4],\n",
" [ 1.4, 0.3],\n",
" [ 1.5, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 1.5, 0.1],\n",
" [ 1.5, 0.2],\n",
" [ 1.6, 0.2],\n",
" [ 1.4, 0.1],\n",
" [ 1.1, 0.1],\n",
" [ 1.2, 0.2],\n",
" [ 1.5, 0.4],\n",
" [ 1.3, 0.4],\n",
" [ 1.4, 0.3],\n",
" [ 1.7, 0.3],\n",
" [ 1.5, 0.3],\n",
" [ 1.7, 0.2],\n",
" [ 1.5, 0.4],\n",
" [ 1. , 0.2],\n",
" [ 1.7, 0.5],\n",
" [ 1.9, 0.2],\n",
" [ 1.6, 0.2],\n",
" [ 1.6, 0.4],\n",
" [ 1.5, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 1.6, 0.2],\n",
" [ 1.6, 0.2],\n",
" [ 1.5, 0.4],\n",
" [ 1.5, 0.1],\n",
" [ 1.4, 0.2],\n",
" [ 1.5, 0.1],\n",
" [ 1.2, 0.2],\n",
" [ 1.3, 0.2],\n",
" [ 1.5, 0.1],\n",
" [ 1.3, 0.2],\n",
" [ 1.5, 0.2],\n",
" [ 1.3, 0.3],\n",
" [ 1.3, 0.3],\n",
" [ 1.3, 0.2],\n",
" [ 1.6, 0.6],\n",
" [ 1.9, 0.4],\n",
" [ 1.4, 0.3],\n",
" [ 1.6, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 1.5, 0.2],\n",
" [ 1.4, 0.2],\n",
" [ 4.7, 1.4],\n",
" [ 4.5, 1.5],\n",
" [ 4.9, 1.5],\n",
" [ 4. , 1.3],\n",
" [ 4.6, 1.5],\n",
" [ 4.5, 1.3],\n",
" [ 4.7, 1.6],\n",
" [ 3.3, 1. ],\n",
" [ 4.6, 1.3],\n",
" [ 3.9, 1.4],\n",
" [ 3.5, 1. ],\n",
" [ 4.2, 1.5],\n",
" [ 4. , 1. ],\n",
" [ 4.7, 1.4],\n",
" [ 3.6, 1.3],\n",
" [ 4.4, 1.4],\n",
" [ 4.5, 1.5],\n",
" [ 4.1, 1. ],\n",
" [ 4.5, 1.5],\n",
" [ 3.9, 1.1],\n",
" [ 4.8, 1.8],\n",
" [ 4. , 1.3],\n",
" [ 4.9, 1.5],\n",
" [ 4.7, 1.2],\n",
" [ 4.3, 1.3],\n",
" [ 4.4, 1.4],\n",
" [ 4.8, 1.4],\n",
" [ 5. , 1.7],\n",
" [ 4.5, 1.5],\n",
" [ 3.5, 1. ],\n",
" [ 3.8, 1.1],\n",
" [ 3.7, 1. ],\n",
" [ 3.9, 1.2],\n",
" [ 5.1, 1.6],\n",
" [ 4.5, 1.5],\n",
" [ 4.5, 1.6],\n",
" [ 4.7, 1.5],\n",
" [ 4.4, 1.3],\n",
" [ 4.1, 1.3],\n",
" [ 4. , 1.3],\n",
" [ 4.4, 1.2],\n",
" [ 4.6, 1.4],\n",
" [ 4. , 1.2],\n",
" [ 3.3, 1. ],\n",
" [ 4.2, 1.3],\n",
" [ 4.2, 1.2],\n",
" [ 4.2, 1.3],\n",
" [ 4.3, 1.3],\n",
" [ 3. , 1.1],\n",
" [ 4.1, 1.3],\n",
" [ 6. , 2.5],\n",
" [ 5.1, 1.9],\n",
" [ 5.9, 2.1],\n",
" [ 5.6, 1.8],\n",
" [ 5.8, 2.2],\n",
" [ 6.6, 2.1],\n",
" [ 4.5, 1.7],\n",
" [ 6.3, 1.8],\n",
" [ 5.8, 1.8],\n",
" [ 6.1, 2.5],\n",
" [ 5.1, 2. ],\n",
" [ 5.3, 1.9],\n",
" [ 5.5, 2.1],\n",
" [ 5. , 2. ],\n",
" [ 5.1, 2.4],\n",
" [ 5.3, 2.3],\n",
" [ 5.5, 1.8],\n",
" [ 6.7, 2.2],\n",
" [ 6.9, 2.3],\n",
" [ 5. , 1.5],\n",
" [ 5.7, 2.3],\n",
" [ 4.9, 2. ],\n",
" [ 6.7, 2. ],\n",
" [ 4.9, 1.8],\n",
" [ 5.7, 2.1],\n",
" [ 6. , 1.8],\n",
" [ 4.8, 1.8],\n",
" [ 4.9, 1.8],\n",
" [ 5.6, 2.1],\n",
" [ 5.8, 1.6],\n",
" [ 6.1, 1.9],\n",
" [ 6.4, 2. ],\n",
" [ 5.6, 2.2],\n",
" [ 5.1, 1.5],\n",
" [ 5.6, 1.4],\n",