Created
July 18, 2016 11:38
-
-
Save anirudhjayaraman/b50392bfe155fa1118437a1b778806e2 to your computer and use it in GitHub Desktop.
KNN algorithm implemented with scikit learn
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### k Nearest Neighbors in scikit learn" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# getting our data playground ready\n", | |
"from sklearn import datasets" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# getting KNN related artillery ready\n", | |
"from sklearn import neighbors" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"target_names\n", | |
"['setosa' 'versicolor' 'virginica']\n", | |
"\n", | |
"data\n", | |
"[[ 5.1 3.5 1.4 0.2]\n", | |
" [ 4.9 3. 1.4 0.2]\n", | |
" [ 4.7 3.2 1.3 0.2]\n", | |
" [ 4.6 3.1 1.5 0.2]\n", | |
" [ 5. 3.6 1.4 0.2]\n", | |
" [ 5.4 3.9 1.7 0.4]\n", | |
" [ 4.6 3.4 1.4 0.3]\n", | |
" [ 5. 3.4 1.5 0.2]\n", | |
" [ 4.4 2.9 1.4 0.2]\n", | |
" [ 4.9 3.1 1.5 0.1]\n", | |
" [ 5.4 3.7 1.5 0.2]\n", | |
" [ 4.8 3.4 1.6 0.2]\n", | |
" [ 4.8 3. 1.4 0.1]\n", | |
" [ 4.3 3. 1.1 0.1]\n", | |
" [ 5.8 4. 1.2 0.2]\n", | |
" [ 5.7 4.4 1.5 0.4]\n", | |
" [ 5.4 3.9 1.3 0.4]\n", | |
" [ 5.1 3.5 1.4 0.3]\n", | |
" [ 5.7 3.8 1.7 0.3]\n", | |
" [ 5.1 3.8 1.5 0.3]\n", | |
" [ 5.4 3.4 1.7 0.2]\n", | |
" [ 5.1 3.7 1.5 0.4]\n", | |
" [ 4.6 3.6 1. 0.2]\n", | |
" [ 5.1 3.3 1.7 0.5]\n", | |
" [ 4.8 3.4 1.9 0.2]\n", | |
" [ 5. 3. 1.6 0.2]\n", | |
" [ 5. 3.4 1.6 0.4]\n", | |
" [ 5.2 3.5 1.5 0.2]\n", | |
" [ 5.2 3.4 1.4 0.2]\n", | |
" [ 4.7 3.2 1.6 0.2]\n", | |
" [ 4.8 3.1 1.6 0.2]\n", | |
" [ 5.4 3.4 1.5 0.4]\n", | |
" [ 5.2 4.1 1.5 0.1]\n", | |
" [ 5.5 4.2 1.4 0.2]\n", | |
" [ 4.9 3.1 1.5 0.1]\n", | |
" [ 5. 3.2 1.2 0.2]\n", | |
" [ 5.5 3.5 1.3 0.2]\n", | |
" [ 4.9 3.1 1.5 0.1]\n", | |
" [ 4.4 3. 1.3 0.2]\n", | |
" [ 5.1 3.4 1.5 0.2]\n", | |
" [ 5. 3.5 1.3 0.3]\n", | |
" [ 4.5 2.3 1.3 0.3]\n", | |
" [ 4.4 3.2 1.3 0.2]\n", | |
" [ 5. 3.5 1.6 0.6]\n", | |
" [ 5.1 3.8 1.9 0.4]\n", | |
" [ 4.8 3. 1.4 0.3]\n", | |
" [ 5.1 3.8 1.6 0.2]\n", | |
" [ 4.6 3.2 1.4 0.2]\n", | |
" [ 5.3 3.7 1.5 0.2]\n", | |
" [ 5. 3.3 1.4 0.2]\n", | |
" [ 7. 3.2 4.7 1.4]\n", | |
" [ 6.4 3.2 4.5 1.5]\n", | |
" [ 6.9 3.1 4.9 1.5]\n", | |
" [ 5.5 2.3 4. 1.3]\n", | |
" [ 6.5 2.8 4.6 1.5]\n", | |
" [ 5.7 2.8 4.5 1.3]\n", | |
" [ 6.3 3.3 4.7 1.6]\n", | |
" [ 4.9 2.4 3.3 1. ]\n", | |
" [ 6.6 2.9 4.6 1.3]\n", | |
" [ 5.2 2.7 3.9 1.4]\n", | |
" [ 5. 2. 3.5 1. ]\n", | |
" [ 5.9 3. 4.2 1.5]\n", | |
" [ 6. 2.2 4. 1. ]\n", | |
" [ 6.1 2.9 4.7 1.4]\n", | |
" [ 5.6 2.9 3.6 1.3]\n", | |
" [ 6.7 3.1 4.4 1.4]\n", | |
" [ 5.6 3. 4.5 1.5]\n", | |
" [ 5.8 2.7 4.1 1. ]\n", | |
" [ 6.2 2.2 4.5 1.5]\n", | |
" [ 5.6 2.5 3.9 1.1]\n", | |
" [ 5.9 3.2 4.8 1.8]\n", | |
" [ 6.1 2.8 4. 1.3]\n", | |
" [ 6.3 2.5 4.9 1.5]\n", | |
" [ 6.1 2.8 4.7 1.2]\n", | |
" [ 6.4 2.9 4.3 1.3]\n", | |
" [ 6.6 3. 4.4 1.4]\n", | |
" [ 6.8 2.8 4.8 1.4]\n", | |
" [ 6.7 3. 5. 1.7]\n", | |
" [ 6. 2.9 4.5 1.5]\n", | |
" [ 5.7 2.6 3.5 1. ]\n", | |
" [ 5.5 2.4 3.8 1.1]\n", | |
" [ 5.5 2.4 3.7 1. ]\n", | |
" [ 5.8 2.7 3.9 1.2]\n", | |
" [ 6. 2.7 5.1 1.6]\n", | |
" [ 5.4 3. 4.5 1.5]\n", | |
" [ 6. 3.4 4.5 1.6]\n", | |
" [ 6.7 3.1 4.7 1.5]\n", | |
" [ 6.3 2.3 4.4 1.3]\n", | |
" [ 5.6 3. 4.1 1.3]\n", | |
" [ 5.5 2.5 4. 1.3]\n", | |
" [ 5.5 2.6 4.4 1.2]\n", | |
" [ 6.1 3. 4.6 1.4]\n", | |
" [ 5.8 2.6 4. 1.2]\n", | |
" [ 5. 2.3 3.3 1. ]\n", | |
" [ 5.6 2.7 4.2 1.3]\n", | |
" [ 5.7 3. 4.2 1.2]\n", | |
" [ 5.7 2.9 4.2 1.3]\n", | |
" [ 6.2 2.9 4.3 1.3]\n", | |
" [ 5.1 2.5 3. 1.1]\n", | |
" [ 5.7 2.8 4.1 1.3]\n", | |
" [ 6.3 3.3 6. 2.5]\n", | |
" [ 5.8 2.7 5.1 1.9]\n", | |
" [ 7.1 3. 5.9 2.1]\n", | |
" [ 6.3 2.9 5.6 1.8]\n", | |
" [ 6.5 3. 5.8 2.2]\n", | |
" [ 7.6 3. 6.6 2.1]\n", | |
" [ 4.9 2.5 4.5 1.7]\n", | |
" [ 7.3 2.9 6.3 1.8]\n", | |
" [ 6.7 2.5 5.8 1.8]\n", | |
" [ 7.2 3.6 6.1 2.5]\n", | |
" [ 6.5 3.2 5.1 2. ]\n", | |
" [ 6.4 2.7 5.3 1.9]\n", | |
" [ 6.8 3. 5.5 2.1]\n", | |
" [ 5.7 2.5 5. 2. ]\n", | |
" [ 5.8 2.8 5.1 2.4]\n", | |
" [ 6.4 3.2 5.3 2.3]\n", | |
" [ 6.5 3. 5.5 1.8]\n", | |
" [ 7.7 3.8 6.7 2.2]\n", | |
" [ 7.7 2.6 6.9 2.3]\n", | |
" [ 6. 2.2 5. 1.5]\n", | |
" [ 6.9 3.2 5.7 2.3]\n", | |
" [ 5.6 2.8 4.9 2. ]\n", | |
" [ 7.7 2.8 6.7 2. ]\n", | |
" [ 6.3 2.7 4.9 1.8]\n", | |
" [ 6.7 3.3 5.7 2.1]\n", | |
" [ 7.2 3.2 6. 1.8]\n", | |
" [ 6.2 2.8 4.8 1.8]\n", | |
" [ 6.1 3. 4.9 1.8]\n", | |
" [ 6.4 2.8 5.6 2.1]\n", | |
" [ 7.2 3. 5.8 1.6]\n", | |
" [ 7.4 2.8 6.1 1.9]\n", | |
" [ 7.9 3.8 6.4 2. ]\n", | |
" [ 6.4 2.8 5.6 2.2]\n", | |
" [ 6.3 2.8 5.1 1.5]\n", | |
" [ 6.1 2.6 5.6 1.4]\n", | |
" [ 7.7 3. 6.1 2.3]\n", | |
" [ 6.3 3.4 5.6 2.4]\n", | |
" [ 6.4 3.1 5.5 1.8]\n", | |
" [ 6. 3. 4.8 1.8]\n", | |
" [ 6.9 3.1 5.4 2.1]\n", | |
" [ 6.7 3.1 5.6 2.4]\n", | |
" [ 6.9 3.1 5.1 2.3]\n", | |
" [ 5.8 2.7 5.1 1.9]\n", | |
" [ 6.8 3.2 5.9 2.3]\n", | |
" [ 6.7 3.3 5.7 2.5]\n", | |
" [ 6.7 3. 5.2 2.3]\n", | |
" [ 6.3 2.5 5. 1.9]\n", | |
" [ 6.5 3. 5.2 2. ]\n", | |
" [ 6.2 3.4 5.4 2.3]\n", | |
" [ 5.9 3. 5.1 1.8]]\n", | |
"\n", | |
"target\n", | |
"[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", | |
" 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n", | |
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2\n", | |
" 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2\n", | |
" 2 2]\n", | |
"\n", | |
"DESCR\n", | |
"Iris Plants Database\n", | |
"\n", | |
"Notes\n", | |
"-----\n", | |
"Data Set Characteristics:\n", | |
" :Number of Instances: 150 (50 in each of three classes)\n", | |
" :Number of Attributes: 4 numeric, predictive attributes and the class\n", | |
" :Attribute Information:\n", | |
" - sepal length in cm\n", | |
" - sepal width in cm\n", | |
" - petal length in cm\n", | |
" - petal width in cm\n", | |
" - class:\n", | |
" - Iris-Setosa\n", | |
" - Iris-Versicolour\n", | |
" - Iris-Virginica\n", | |
" :Summary Statistics:\n", | |
" ============== ==== ==== ======= ===== ====================\n", | |
" Min Max Mean SD Class Correlation\n", | |
" ============== ==== ==== ======= ===== ====================\n", | |
" sepal length: 4.3 7.9 5.84 0.83 0.7826\n", | |
" sepal width: 2.0 4.4 3.05 0.43 -0.4194\n", | |
" petal length: 1.0 6.9 3.76 1.76 0.9490 (high!)\n", | |
" petal width: 0.1 2.5 1.20 0.76 0.9565 (high!)\n", | |
" ============== ==== ==== ======= ===== ====================\n", | |
" :Missing Attribute Values: None\n", | |
" :Class Distribution: 33.3% for each of 3 classes.\n", | |
" :Creator: R.A. Fisher\n", | |
" :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\n", | |
" :Date: July, 1988\n", | |
"\n", | |
"This is a copy of UCI ML iris datasets.\n", | |
"http://archive.ics.uci.edu/ml/datasets/Iris\n", | |
"\n", | |
"The famous Iris database, first used by Sir R.A Fisher\n", | |
"\n", | |
"This is perhaps the best known database to be found in the\n", | |
"pattern recognition literature. Fisher's paper is a classic in the field and\n", | |
"is referenced frequently to this day. (See Duda & Hart, for example.) The\n", | |
"data set contains 3 classes of 50 instances each, where each class refers to a\n", | |
"type of iris plant. One class is linearly separable from the other 2; the\n", | |
"latter are NOT linearly separable from each other.\n", | |
"\n", | |
"References\n", | |
"----------\n", | |
" - Fisher,R.A. \"The use of multiple measurements in taxonomic problems\"\n", | |
" Annual Eugenics, 7, Part II, 179-188 (1936); also in \"Contributions to\n", | |
" Mathematical Statistics\" (John Wiley, NY, 1950).\n", | |
" - Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.\n", | |
" (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218.\n", | |
" - Dasarathy, B.V. (1980) \"Nosing Around the Neighborhood: A New System\n", | |
" Structure and Classification Rule for Recognition in Partially Exposed\n", | |
" Environments\". IEEE Transactions on Pattern Analysis and Machine\n", | |
" Intelligence, Vol. PAMI-2, No. 1, 67-71.\n", | |
" - Gates, G.W. (1972) \"The Reduced Nearest Neighbor Rule\". IEEE Transactions\n", | |
" on Information Theory, May 1972, 431-433.\n", | |
" - See also: 1988 MLC Proceedings, 54-64. Cheeseman et al\"s AUTOCLASS II\n", | |
" conceptual clustering system finds 3 classes in the data.\n", | |
" - Many, many more ...\n", | |
"\n", | |
"\n", | |
"feature_names\n", | |
"['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"iris = datasets.load_iris()\n", | |
"# the dataset is loaded as a dictionary of key-value pairs\n", | |
"# the relevant attributes:\n", | |
"\n", | |
"for i in range(len(iris.keys())):\n", | |
" print(iris.keys()[i])\n", | |
" print(iris.values()[i])\n", | |
" print" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from sklearn.cross_validation import train_test_split\n", | |
"X,y = iris.data, iris.target\n", | |
"X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.3, random_state = 42)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Help on package sklearn.neighbors in sklearn:\n", | |
"\n", | |
"NAME\n", | |
" sklearn.neighbors\n", | |
"\n", | |
"FILE\n", | |
" /home/anirudh/anaconda/lib/python2.7/site-packages/sklearn/neighbors/__init__.py\n", | |
"\n", | |
"DESCRIPTION\n", | |
" The :mod:`sklearn.neighbors` module implements the k-nearest neighbors\n", | |
" algorithm.\n", | |
"\n", | |
"PACKAGE CONTENTS\n", | |
" approximate\n", | |
" ball_tree\n", | |
" base\n", | |
" classification\n", | |
" dist_metrics\n", | |
" graph\n", | |
" kd_tree\n", | |
" kde\n", | |
" nearest_centroid\n", | |
" regression\n", | |
" setup\n", | |
" typedefs\n", | |
" unsupervised\n", | |
"\n", | |
"CLASSES\n", | |
" __builtin__.object\n", | |
" sklearn.neighbors.dist_metrics.DistanceMetric\n", | |
" sklearn.base.BaseEstimator(__builtin__.object)\n", | |
" sklearn.neighbors.approximate.LSHForest(sklearn.base.BaseEstimator, sklearn.neighbors.base.KNeighborsMixin, sklearn.neighbors.base.RadiusNeighborsMixin)\n", | |
" sklearn.neighbors.kde.KernelDensity\n", | |
" sklearn.neighbors.nearest_centroid.NearestCentroid(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin)\n", | |
" sklearn.base.ClassifierMixin(__builtin__.object)\n", | |
" sklearn.neighbors.classification.KNeighborsClassifier(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.KNeighborsMixin, sklearn.neighbors.base.SupervisedIntegerMixin, sklearn.base.ClassifierMixin)\n", | |
" sklearn.neighbors.classification.RadiusNeighborsClassifier(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.RadiusNeighborsMixin, sklearn.neighbors.base.SupervisedIntegerMixin, sklearn.base.ClassifierMixin)\n", | |
" sklearn.neighbors.nearest_centroid.NearestCentroid(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin)\n", | |
" sklearn.base.RegressorMixin(__builtin__.object)\n", | |
" sklearn.neighbors.regression.KNeighborsRegressor(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.KNeighborsMixin, sklearn.neighbors.base.SupervisedFloatMixin, sklearn.base.RegressorMixin)\n", | |
" sklearn.neighbors.regression.RadiusNeighborsRegressor(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.RadiusNeighborsMixin, sklearn.neighbors.base.SupervisedFloatMixin, sklearn.base.RegressorMixin)\n", | |
" sklearn.neighbors.ball_tree.BinaryTree(__builtin__.object)\n", | |
" sklearn.neighbors.ball_tree.BallTree\n", | |
" sklearn.neighbors.base.KNeighborsMixin(__builtin__.object)\n", | |
" sklearn.neighbors.approximate.LSHForest(sklearn.base.BaseEstimator, sklearn.neighbors.base.KNeighborsMixin, sklearn.neighbors.base.RadiusNeighborsMixin)\n", | |
" sklearn.neighbors.classification.KNeighborsClassifier(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.KNeighborsMixin, sklearn.neighbors.base.SupervisedIntegerMixin, sklearn.base.ClassifierMixin)\n", | |
" sklearn.neighbors.regression.KNeighborsRegressor(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.KNeighborsMixin, sklearn.neighbors.base.SupervisedFloatMixin, sklearn.base.RegressorMixin)\n", | |
" sklearn.neighbors.unsupervised.NearestNeighbors(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.KNeighborsMixin, sklearn.neighbors.base.RadiusNeighborsMixin, sklearn.neighbors.base.UnsupervisedMixin)\n", | |
" sklearn.neighbors.base.NeighborsBase(abc.NewBase)\n", | |
" sklearn.neighbors.classification.KNeighborsClassifier(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.KNeighborsMixin, sklearn.neighbors.base.SupervisedIntegerMixin, sklearn.base.ClassifierMixin)\n", | |
" sklearn.neighbors.classification.RadiusNeighborsClassifier(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.RadiusNeighborsMixin, sklearn.neighbors.base.SupervisedIntegerMixin, sklearn.base.ClassifierMixin)\n", | |
" sklearn.neighbors.regression.KNeighborsRegressor(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.KNeighborsMixin, sklearn.neighbors.base.SupervisedFloatMixin, sklearn.base.RegressorMixin)\n", | |
" sklearn.neighbors.regression.RadiusNeighborsRegressor(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.RadiusNeighborsMixin, sklearn.neighbors.base.SupervisedFloatMixin, sklearn.base.RegressorMixin)\n", | |
" sklearn.neighbors.unsupervised.NearestNeighbors(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.KNeighborsMixin, sklearn.neighbors.base.RadiusNeighborsMixin, sklearn.neighbors.base.UnsupervisedMixin)\n", | |
" sklearn.neighbors.base.RadiusNeighborsMixin(__builtin__.object)\n", | |
" sklearn.neighbors.approximate.LSHForest(sklearn.base.BaseEstimator, sklearn.neighbors.base.KNeighborsMixin, sklearn.neighbors.base.RadiusNeighborsMixin)\n", | |
" sklearn.neighbors.classification.RadiusNeighborsClassifier(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.RadiusNeighborsMixin, sklearn.neighbors.base.SupervisedIntegerMixin, sklearn.base.ClassifierMixin)\n", | |
" sklearn.neighbors.regression.RadiusNeighborsRegressor(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.RadiusNeighborsMixin, sklearn.neighbors.base.SupervisedFloatMixin, sklearn.base.RegressorMixin)\n", | |
" sklearn.neighbors.unsupervised.NearestNeighbors(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.KNeighborsMixin, sklearn.neighbors.base.RadiusNeighborsMixin, sklearn.neighbors.base.UnsupervisedMixin)\n", | |
" sklearn.neighbors.base.SupervisedFloatMixin(__builtin__.object)\n", | |
" sklearn.neighbors.regression.KNeighborsRegressor(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.KNeighborsMixin, sklearn.neighbors.base.SupervisedFloatMixin, sklearn.base.RegressorMixin)\n", | |
" sklearn.neighbors.regression.RadiusNeighborsRegressor(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.RadiusNeighborsMixin, sklearn.neighbors.base.SupervisedFloatMixin, sklearn.base.RegressorMixin)\n", | |
" sklearn.neighbors.base.SupervisedIntegerMixin(__builtin__.object)\n", | |
" sklearn.neighbors.classification.KNeighborsClassifier(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.KNeighborsMixin, sklearn.neighbors.base.SupervisedIntegerMixin, sklearn.base.ClassifierMixin)\n", | |
" sklearn.neighbors.classification.RadiusNeighborsClassifier(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.RadiusNeighborsMixin, sklearn.neighbors.base.SupervisedIntegerMixin, sklearn.base.ClassifierMixin)\n", | |
" sklearn.neighbors.base.UnsupervisedMixin(__builtin__.object)\n", | |
" sklearn.neighbors.unsupervised.NearestNeighbors(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.KNeighborsMixin, sklearn.neighbors.base.RadiusNeighborsMixin, sklearn.neighbors.base.UnsupervisedMixin)\n", | |
" sklearn.neighbors.kd_tree.BinaryTree(__builtin__.object)\n", | |
" sklearn.neighbors.kd_tree.KDTree\n", | |
" \n", | |
" class BallTree(BinaryTree)\n", | |
" | BallTree for fast generalized N-point problems\n", | |
" | \n", | |
" | BallTree(X, leaf_size=40, metric='minkowski', \\**kwargs)\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, shape = [n_samples, n_features]\n", | |
" | n_samples is the number of points in the data set, and\n", | |
" | n_features is the dimension of the parameter space.\n", | |
" | Note: if X is a C-contiguous array of doubles then data will\n", | |
" | not be copied. Otherwise, an internal copy will be made.\n", | |
" | \n", | |
" | leaf_size : positive integer (default = 20)\n", | |
" | Number of points at which to switch to brute-force. Changing\n", | |
" | leaf_size will not affect the results of a query, but can\n", | |
" | significantly impact the speed of a query and the memory required\n", | |
" | to store the constructed tree. The amount of memory needed to\n", | |
" | store the tree scales as approximately n_samples / leaf_size.\n", | |
" | For a specified ``leaf_size``, a leaf node is guaranteed to\n", | |
" | satisfy ``leaf_size <= n_points <= 2 * leaf_size``, except in\n", | |
" | the case that ``n_samples < leaf_size``.\n", | |
" | \n", | |
" | metric : string or DistanceMetric object\n", | |
" | the distance metric to use for the tree. Default='minkowski'\n", | |
" | with p=2 (that is, a euclidean metric). See the documentation\n", | |
" | of the DistanceMetric class for a list of available metrics.\n", | |
" | ball_tree.valid_metrics gives a list of the metrics which\n", | |
" | are valid for BallTree.\n", | |
" | \n", | |
" | Additional keywords are passed to the distance metric class.\n", | |
" | \n", | |
" | Attributes\n", | |
" | ----------\n", | |
" | data : np.ndarray\n", | |
" | The training data\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | Query for k-nearest neighbors\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | \n", | |
" | >>> np.random.seed(0)\n", | |
" | >>> X = np.random.random((10, 3)) # 10 points in 3 dimensions\n", | |
" | >>> tree = BallTree(X, leaf_size=2) # doctest: +SKIP\n", | |
" | >>> dist, ind = tree.query(X[0], k=3) # doctest: +SKIP\n", | |
" | >>> print ind # indices of 3 closest neighbors\n", | |
" | [0 3 1]\n", | |
" | >>> print dist # distances to 3 closest neighbors\n", | |
" | [ 0. 0.19662693 0.29473397]\n", | |
" | \n", | |
" | Pickle and Unpickle a tree. Note that the state of the tree is saved in the\n", | |
" | pickle operation: the tree needs not be rebuilt upon unpickling.\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | >>> import pickle\n", | |
" | >>> np.random.seed(0)\n", | |
" | >>> X = np.random.random((10, 3)) # 10 points in 3 dimensions\n", | |
" | >>> tree = BallTree(X, leaf_size=2) # doctest: +SKIP\n", | |
" | >>> s = pickle.dumps(tree) # doctest: +SKIP\n", | |
" | >>> tree_copy = pickle.loads(s) # doctest: +SKIP\n", | |
" | >>> dist, ind = tree_copy.query(X[0], k=3) # doctest: +SKIP\n", | |
" | >>> print ind # indices of 3 closest neighbors\n", | |
" | [0 3 1]\n", | |
" | >>> print dist # distances to 3 closest neighbors\n", | |
" | [ 0. 0.19662693 0.29473397]\n", | |
" | \n", | |
" | Query for neighbors within a given radius\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | >>> np.random.seed(0)\n", | |
" | >>> X = np.random.random((10, 3)) # 10 points in 3 dimensions\n", | |
" | >>> tree = BallTree(X, leaf_size=2) # doctest: +SKIP\n", | |
" | >>> print tree.query_radius(X[0], r=0.3, count_only=True)\n", | |
" | 3\n", | |
" | >>> ind = tree.query_radius(X[0], r=0.3) # doctest: +SKIP\n", | |
" | >>> print ind # indices of neighbors within distance 0.3\n", | |
" | [3 0 1]\n", | |
" | \n", | |
" | \n", | |
" | Compute a gaussian kernel density estimate:\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | >>> np.random.seed(1)\n", | |
" | >>> X = np.random.random((100, 3))\n", | |
" | >>> tree = BallTree(X) # doctest: +SKIP\n", | |
" | >>> tree.kernel_density(X[:3], h=0.1, kernel='gaussian')\n", | |
" | array([ 6.94114649, 7.83281226, 7.2071716 ])\n", | |
" | \n", | |
" | Compute a two-point auto-correlation function\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | >>> np.random.seed(0)\n", | |
" | >>> X = np.random.random((30, 3))\n", | |
" | >>> r = np.linspace(0, 1, 5)\n", | |
" | >>> tree = BallTree(X) # doctest: +SKIP\n", | |
" | >>> tree.two_point_correlation(X, r)\n", | |
" | array([ 30, 62, 278, 580, 820])\n", | |
" | \n", | |
" | Method resolution order:\n", | |
" | BallTree\n", | |
" | BinaryTree\n", | |
" | __builtin__.object\n", | |
" | \n", | |
" | Data and other attributes defined here:\n", | |
" | \n", | |
" | __new__ = <built-in method __new__ of type object>\n", | |
" | T.__new__(S, ...) -> a new object with type S, a subtype of T\n", | |
" | \n", | |
" | __pyx_vtable__ = <capsule object NULL>\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from BinaryTree:\n", | |
" | \n", | |
" | __getstate__(...)\n", | |
" | get state for pickling\n", | |
" | \n", | |
" | __init__(...)\n", | |
" | x.__init__(...) initializes x; see help(type(x)) for signature\n", | |
" | \n", | |
" | __reduce__(...)\n", | |
" | reduce method used for pickling\n", | |
" | \n", | |
" | __setstate__(...)\n", | |
" | set state for pickling\n", | |
" | \n", | |
" | get_arrays(...)\n", | |
" | \n", | |
" | get_n_calls(...)\n", | |
" | \n", | |
" | get_tree_stats(...)\n", | |
" | \n", | |
" | kernel_density(...)\n", | |
" | kernel_density(self, X, h, kernel='gaussian', atol=0, rtol=1E-8,\n", | |
" | breadth_first=True, return_log=False)\n", | |
" | \n", | |
" | Compute the kernel density estimate at points X with the given kernel,\n", | |
" | using the distance metric specified at tree creation.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array_like\n", | |
" | An array of points to query. Last dimension should match dimension\n", | |
" | of training data.\n", | |
" | h : float\n", | |
" | the bandwidth of the kernel\n", | |
" | kernel : string\n", | |
" | specify the kernel to use. Options are\n", | |
" | - 'gaussian'\n", | |
" | - 'tophat'\n", | |
" | - 'epanechnikov'\n", | |
" | - 'exponential'\n", | |
" | - 'linear'\n", | |
" | - 'cosine'\n", | |
" | Default is kernel = 'gaussian'\n", | |
" | atol, rtol : float (default = 0)\n", | |
" | Specify the desired relative and absolute tolerance of the result.\n", | |
" | If the true result is K_true, then the returned result K_ret\n", | |
" | satisfies ``abs(K_true - K_ret) < atol + rtol * K_ret``\n", | |
" | The default is zero (i.e. machine precision) for both.\n", | |
" | breadth_first : boolean (default = False)\n", | |
" | if True, use a breadth-first search. If False (default) use a\n", | |
" | depth-first search. Breadth-first is generally faster for\n", | |
" | compact kernels and/or high tolerances.\n", | |
" | return_log : boolean (default = False)\n", | |
" | return the logarithm of the result. This can be more accurate\n", | |
" | than returning the result itself for narrow kernels.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | density : ndarray\n", | |
" | The array of (log)-density evaluations, shape = X.shape[:-1]\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | Compute a gaussian kernel density estimate:\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | >>> np.random.seed(1)\n", | |
" | >>> X = np.random.random((100, 3))\n", | |
" | >>> tree = BinaryTree(X) # doctest: +SKIP\n", | |
" | >>> tree.kernel_density(X[:3], h=0.1, kernel='gaussian')\n", | |
" | array([ 6.94114649, 7.83281226, 7.2071716 ])\n", | |
" | \n", | |
" | query(...)\n", | |
" | query(X, k=1, return_distance=True,\n", | |
" | dualtree=False, breadth_first=False)\n", | |
" | \n", | |
" | query the tree for the k nearest neighbors\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, last dimension self.dim\n", | |
" | An array of points to query\n", | |
" | k : integer (default = 1)\n", | |
" | The number of nearest neighbors to return\n", | |
" | return_distance : boolean (default = True)\n", | |
" | if True, return a tuple (d, i) of distances and indices\n", | |
" | if False, return array i\n", | |
" | dualtree : boolean (default = False)\n", | |
" | if True, use the dual tree formalism for the query: a tree is\n", | |
" | built for the query points, and the pair of trees is used to\n", | |
" | efficiently search this space. This can lead to better\n", | |
" | performance as the number of points grows large.\n", | |
" | breadth_first : boolean (default = False)\n", | |
" | if True, then query the nodes in a breadth-first manner.\n", | |
" | Otherwise, query the nodes in a depth-first manner.\n", | |
" | sort_results : boolean (default = True)\n", | |
" | if True, then distances and indices of each point are sorted\n", | |
" | on return, so that the first column contains the closest points.\n", | |
" | Otherwise, neighbors are returned in an arbitrary order.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | i : if return_distance == False\n", | |
" | (d,i) : if return_distance == True\n", | |
" | \n", | |
" | d : array of doubles - shape: x.shape[:-1] + (k,)\n", | |
" | each entry gives the list of distances to the\n", | |
" | neighbors of the corresponding point\n", | |
" | \n", | |
" | i : array of integers - shape: x.shape[:-1] + (k,)\n", | |
" | each entry gives the list of indices of\n", | |
" | neighbors of the corresponding point\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | Query for k-nearest neighbors\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | >>> np.random.seed(0)\n", | |
" | >>> X = np.random.random((10, 3)) # 10 points in 3 dimensions\n", | |
" | >>> tree = BinaryTree(X, leaf_size=2) # doctest: +SKIP\n", | |
" | >>> dist, ind = tree.query(X[0], k=3) # doctest: +SKIP\n", | |
" | >>> print ind # indices of 3 closest neighbors\n", | |
" | [0 3 1]\n", | |
" | >>> print dist # distances to 3 closest neighbors\n", | |
" | [ 0. 0.19662693 0.29473397]\n", | |
" | \n", | |
" | query_radius(...)\n", | |
" | query_radius(self, X, r, count_only = False):\n", | |
" | \n", | |
" | query the tree for neighbors within a radius r\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, last dimension self.dim\n", | |
" | An array of points to query\n", | |
" | r : distance within which neighbors are returned\n", | |
" | r can be a single value, or an array of values of shape\n", | |
" | x.shape[:-1] if different radii are desired for each point.\n", | |
" | return_distance : boolean (default = False)\n", | |
" | if True, return distances to neighbors of each point\n", | |
" | if False, return only neighbors\n", | |
" | Note that unlike the query() method, setting return_distance=True\n", | |
" | here adds to the computation time. Not all distances need to be\n", | |
" | calculated explicitly for return_distance=False. Results are\n", | |
" | not sorted by default: see ``sort_results`` keyword.\n", | |
" | count_only : boolean (default = False)\n", | |
" | if True, return only the count of points within distance r\n", | |
" | if False, return the indices of all points within distance r\n", | |
" | If return_distance==True, setting count_only=True will\n", | |
" | result in an error.\n", | |
" | sort_results : boolean (default = False)\n", | |
" | if True, the distances and indices will be sorted before being\n", | |
" | returned. If False, the results will not be sorted. If\n", | |
" | return_distance == False, setting sort_results = True will\n", | |
" | result in an error.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | count : if count_only == True\n", | |
" | ind : if count_only == False and return_distance == False\n", | |
" | (ind, dist) : if count_only == False and return_distance == True\n", | |
" | \n", | |
" | count : array of integers, shape = X.shape[:-1]\n", | |
" | each entry gives the number of neighbors within\n", | |
" | a distance r of the corresponding point.\n", | |
" | \n", | |
" | ind : array of objects, shape = X.shape[:-1]\n", | |
" | each element is a numpy integer array listing the indices of\n", | |
" | neighbors of the corresponding point. Note that unlike\n", | |
" | the results of a k-neighbors query, the returned neighbors\n", | |
" | are not sorted by distance by default.\n", | |
" | \n", | |
" | dist : array of objects, shape = X.shape[:-1]\n", | |
" | each element is a numpy double array\n", | |
" | listing the distances corresponding to indices in i.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | Query for neighbors in a given radius\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | >>> np.random.seed(0)\n", | |
" | >>> X = np.random.random((10, 3)) # 10 points in 3 dimensions\n", | |
" | >>> tree = BinaryTree(X, leaf_size=2) # doctest: +SKIP\n", | |
" | >>> print tree.query_radius(X[0], r=0.3, count_only=True)\n", | |
" | 3\n", | |
" | >>> ind = tree.query_radius(X[0], r=0.3) # doctest: +SKIP\n", | |
" | >>> print ind # indices of neighbors within distance 0.3\n", | |
" | [3 0 1]\n", | |
" | \n", | |
" | reset_n_calls(...)\n", | |
" | \n", | |
" | two_point_correlation(...)\n", | |
" | Compute the two-point correlation function\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array_like\n", | |
" | An array of points to query. Last dimension should match dimension\n", | |
" | of training data.\n", | |
" | r : array_like\n", | |
" | A one-dimensional array of distances\n", | |
" | dualtree : boolean (default = False)\n", | |
" | If true, use a dualtree algorithm. Otherwise, use a single-tree\n", | |
" | algorithm. Dual tree algorithms can have better scaling for\n", | |
" | large N.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | counts : ndarray\n", | |
" | counts[i] contains the number of pairs of points with distance\n", | |
" | less than or equal to r[i]\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | Compute the two-point autocorrelation function of X:\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | >>> np.random.seed(0)\n", | |
" | >>> X = np.random.random((30, 3))\n", | |
" | >>> r = np.linspace(0, 1, 5)\n", | |
" | >>> tree = BinaryTree(X) # doctest: +SKIP\n", | |
" | >>> tree.two_point_correlation(X, r)\n", | |
" | array([ 30, 62, 278, 580, 820])\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Data descriptors inherited from BinaryTree:\n", | |
" | \n", | |
" | data\n", | |
" | \n", | |
" | idx_array\n", | |
" | \n", | |
" | node_bounds\n", | |
" | \n", | |
" | node_data\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Data and other attributes inherited from BinaryTree:\n", | |
" | \n", | |
" | valid_metrics = ['chebyshev', 'sokalmichener', 'canberra', 'haversine'...\n", | |
" \n", | |
" class DistanceMetric(__builtin__.object)\n", | |
" | DistanceMetric class\n", | |
" | \n", | |
" | This class provides a uniform interface to fast distance metric\n", | |
" | functions. The various metrics can be accessed via the `get_metric`\n", | |
" | class method and the metric string identifier (see below).\n", | |
" | For example, to use the Euclidean distance:\n", | |
" | \n", | |
" | >>> dist = DistanceMetric.get_metric('euclidean')\n", | |
" | >>> X = [[0, 1, 2],\n", | |
" | [3, 4, 5]])\n", | |
" | >>> dist.pairwise(X)\n", | |
" | array([[ 0. , 5.19615242],\n", | |
" | [ 5.19615242, 0. ]])\n", | |
" | \n", | |
" | Available Metrics\n", | |
" | The following lists the string metric identifiers and the associated\n", | |
" | distance metric classes:\n", | |
" | \n", | |
" | **Metrics intended for real-valued vector spaces:**\n", | |
" | \n", | |
" | ============== ==================== ======== ===============================\n", | |
" | identifier class name args distance function\n", | |
" | -------------- -------------------- -------- -------------------------------\n", | |
" | \"euclidean\" EuclideanDistance - ``sqrt(sum((x - y)^2))``\n", | |
" | \"manhattan\" ManhattanDistance - ``sum(|x - y|)``\n", | |
" | \"chebyshev\" ChebyshevDistance - ``sum(max(|x - y|))``\n", | |
" | \"minkowski\" MinkowskiDistance p ``sum(|x - y|^p)^(1/p)``\n", | |
" | \"wminkowski\" WMinkowskiDistance p, w ``sum(w * |x - y|^p)^(1/p)``\n", | |
" | \"seuclidean\" SEuclideanDistance V ``sqrt(sum((x - y)^2 / V))``\n", | |
" | \"mahalanobis\" MahalanobisDistance V or VI ``sqrt((x - y)' V^-1 (x - y))``\n", | |
" | ============== ==================== ======== ===============================\n", | |
" | \n", | |
" | **Metrics intended for two-dimensional vector spaces:**\n", | |
" | \n", | |
" | ============ ================== ========================================\n", | |
" | identifier class name distance function\n", | |
" | ------------ ------------------ ----------------------------------------\n", | |
" | \"haversine\" HaversineDistance 2 arcsin(sqrt(sin^2(0.5*dx)\n", | |
" | + cos(x1)cos(x2)sin^2(0.5*dy)))\n", | |
" | ============ ================== ========================================\n", | |
" | \n", | |
" | \n", | |
" | **Metrics intended for integer-valued vector spaces:** Though intended\n", | |
" | for integer-valued vectors, these are also valid metrics in the case of\n", | |
" | real-valued vectors.\n", | |
" | \n", | |
" | ============= ==================== ========================================\n", | |
" | identifier class name distance function\n", | |
" | ------------- -------------------- ----------------------------------------\n", | |
" | \"hamming\" HammingDistance ``N_unequal(x, y) / N_tot``\n", | |
" | \"canberra\" CanberraDistance ``sum(|x - y| / (|x| + |y|))``\n", | |
" | \"braycurtis\" BrayCurtisDistance ``sum(|x - y|) / (sum(|x|) + sum(|y|))``\n", | |
" | ============= ==================== ========================================\n", | |
" | \n", | |
" | **Metrics intended for boolean-valued vector spaces:** Any nonzero entry\n", | |
" | is evaluated to \"True\". In the listings below, the following\n", | |
" | abbreviations are used:\n", | |
" | \n", | |
" | - N : number of dimensions\n", | |
" | - NTT : number of dims in which both values are True\n", | |
" | - NTF : number of dims in which the first value is True, second is False\n", | |
" | - NFT : number of dims in which the first value is False, second is True\n", | |
" | - NFF : number of dims in which both values are False\n", | |
" | - NNEQ : number of non-equal dimensions, NNEQ = NTF + NFT\n", | |
" | - NNZ : number of nonzero dimensions, NNZ = NTF + NFT + NTT\n", | |
" | \n", | |
" | ================= ======================= ===============================\n", | |
" | identifier class name distance function\n", | |
" | ----------------- ----------------------- -------------------------------\n", | |
" | \"jaccard\" JaccardDistance NNEQ / NNZ\n", | |
" | \"maching\" MatchingDistance NNEQ / N\n", | |
" | \"dice\" DiceDistance NNEQ / (NTT + NNZ)\n", | |
" | \"kulsinski\" KulsinskiDistance (NNEQ + N - NTT) / (NNEQ + N)\n", | |
" | \"rogerstanimoto\" RogersTanimotoDistance 2 * NNEQ / (N + NNEQ)\n", | |
" | \"russellrao\" RussellRaoDistance NNZ / N\n", | |
" | \"sokalmichener\" SokalMichenerDistance 2 * NNEQ / (N + NNEQ)\n", | |
" | \"sokalsneath\" SokalSneathDistance NNEQ / (NNEQ + 0.5 * NTT)\n", | |
" | ================= ======================= ===============================\n", | |
" | \n", | |
" | **User-defined distance:**\n", | |
" | \n", | |
" | =========== =============== =======\n", | |
" | identifier class name args\n", | |
" | ----------- --------------- -------\n", | |
" | \"pyfunc\" PyFuncDistance func\n", | |
" | =========== =============== =======\n", | |
" | \n", | |
" | Here ``func`` is a function which takes two one-dimensional numpy\n", | |
" | arrays, and returns a distance. Note that in order to be used within\n", | |
" | the BallTree, the distance must be a true metric:\n", | |
" | i.e. it must satisfy the following properties\n", | |
" | \n", | |
" | 1) Non-negativity: d(x, y) >= 0\n", | |
" | 2) Identity: d(x, y) = 0 if and only if x == y\n", | |
" | 3) Symmetry: d(x, y) = d(y, x)\n", | |
" | 4) Triangle Inequality: d(x, y) + d(y, z) >= d(x, z)\n", | |
" | \n", | |
" | Because of the Python object overhead involved in calling the python\n", | |
" | function, this will be fairly slow, but it will have the same\n", | |
" | scaling as other distances.\n", | |
" | \n", | |
" | Methods defined here:\n", | |
" | \n", | |
" | __getstate__(...)\n", | |
" | get state for pickling\n", | |
" | \n", | |
" | __init__(...)\n", | |
" | x.__init__(...) initializes x; see help(type(x)) for signature\n", | |
" | \n", | |
" | __reduce__(...)\n", | |
" | reduce method used for pickling\n", | |
" | \n", | |
" | __setstate__(...)\n", | |
" | set state for pickling\n", | |
" | \n", | |
" | dist_to_rdist(...)\n", | |
" | Convert the true distance to the reduced distance.\n", | |
" | \n", | |
" | The reduced distance, defined for some metrics, is a computationally\n", | |
" | more efficent measure which preserves the rank of the true distance.\n", | |
" | For example, in the Euclidean distance metric, the reduced distance\n", | |
" | is the squared-euclidean distance.\n", | |
" | \n", | |
" | get_metric(...)\n", | |
" | Get the given distance metric from the string identifier.\n", | |
" | \n", | |
" | See the docstring of DistanceMetric for a list of available metrics.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | metric : string or class name\n", | |
" | The distance metric to use\n", | |
" | **kwargs\n", | |
" | additional arguments will be passed to the requested metric\n", | |
" | \n", | |
" | pairwise(...)\n", | |
" | Compute the pairwise distances between X and Y\n", | |
" | \n", | |
" | This is a convenience routine for the sake of testing. For many\n", | |
" | metrics, the utilities in scipy.spatial.distance.cdist and\n", | |
" | scipy.spatial.distance.pdist will be faster.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array_like\n", | |
" | Array of shape (Nx, D), representing Nx points in D dimensions.\n", | |
" | Y : array_like (optional)\n", | |
" | Array of shape (Ny, D), representing Ny points in D dimensions.\n", | |
" | If not specified, then Y=X.\n", | |
" | Returns\n", | |
" | -------\n", | |
" | dist : ndarray\n", | |
" | The shape (Nx, Ny) array of pairwise distances between points in\n", | |
" | X and Y.\n", | |
" | \n", | |
" | rdist_to_dist(...)\n", | |
" | Convert the Reduced distance to the true distance.\n", | |
" | \n", | |
" | The reduced distance, defined for some metrics, is a computationally\n", | |
" | more efficent measure which preserves the rank of the true distance.\n", | |
" | For example, in the Euclidean distance metric, the reduced distance\n", | |
" | is the squared-euclidean distance.\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Data and other attributes defined here:\n", | |
" | \n", | |
" | __new__ = <built-in method __new__ of type object>\n", | |
" | T.__new__(S, ...) -> a new object with type S, a subtype of T\n", | |
" | \n", | |
" | __pyx_vtable__ = <capsule object NULL>\n", | |
" \n", | |
" class KDTree(BinaryTree)\n", | |
" | KDTree for fast generalized N-point problems\n", | |
" | \n", | |
" | KDTree(X, leaf_size=40, metric='minkowski', \\**kwargs)\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, shape = [n_samples, n_features]\n", | |
" | n_samples is the number of points in the data set, and\n", | |
" | n_features is the dimension of the parameter space.\n", | |
" | Note: if X is a C-contiguous array of doubles then data will\n", | |
" | not be copied. Otherwise, an internal copy will be made.\n", | |
" | \n", | |
" | leaf_size : positive integer (default = 20)\n", | |
" | Number of points at which to switch to brute-force. Changing\n", | |
" | leaf_size will not affect the results of a query, but can\n", | |
" | significantly impact the speed of a query and the memory required\n", | |
" | to store the constructed tree. The amount of memory needed to\n", | |
" | store the tree scales as approximately n_samples / leaf_size.\n", | |
" | For a specified ``leaf_size``, a leaf node is guaranteed to\n", | |
" | satisfy ``leaf_size <= n_points <= 2 * leaf_size``, except in\n", | |
" | the case that ``n_samples < leaf_size``.\n", | |
" | \n", | |
" | metric : string or DistanceMetric object\n", | |
" | the distance metric to use for the tree. Default='minkowski'\n", | |
" | with p=2 (that is, a euclidean metric). See the documentation\n", | |
" | of the DistanceMetric class for a list of available metrics.\n", | |
" | kd_tree.valid_metrics gives a list of the metrics which\n", | |
" | are valid for KDTree.\n", | |
" | \n", | |
" | Additional keywords are passed to the distance metric class.\n", | |
" | \n", | |
" | Attributes\n", | |
" | ----------\n", | |
" | data : np.ndarray\n", | |
" | The training data\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | Query for k-nearest neighbors\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | \n", | |
" | >>> np.random.seed(0)\n", | |
" | >>> X = np.random.random((10, 3)) # 10 points in 3 dimensions\n", | |
" | >>> tree = KDTree(X, leaf_size=2) # doctest: +SKIP\n", | |
" | >>> dist, ind = tree.query(X[0], k=3) # doctest: +SKIP\n", | |
" | >>> print ind # indices of 3 closest neighbors\n", | |
" | [0 3 1]\n", | |
" | >>> print dist # distances to 3 closest neighbors\n", | |
" | [ 0. 0.19662693 0.29473397]\n", | |
" | \n", | |
" | Pickle and Unpickle a tree. Note that the state of the tree is saved in the\n", | |
" | pickle operation: the tree needs not be rebuilt upon unpickling.\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | >>> import pickle\n", | |
" | >>> np.random.seed(0)\n", | |
" | >>> X = np.random.random((10, 3)) # 10 points in 3 dimensions\n", | |
" | >>> tree = KDTree(X, leaf_size=2) # doctest: +SKIP\n", | |
" | >>> s = pickle.dumps(tree) # doctest: +SKIP\n", | |
" | >>> tree_copy = pickle.loads(s) # doctest: +SKIP\n", | |
" | >>> dist, ind = tree_copy.query(X[0], k=3) # doctest: +SKIP\n", | |
" | >>> print ind # indices of 3 closest neighbors\n", | |
" | [0 3 1]\n", | |
" | >>> print dist # distances to 3 closest neighbors\n", | |
" | [ 0. 0.19662693 0.29473397]\n", | |
" | \n", | |
" | Query for neighbors within a given radius\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | >>> np.random.seed(0)\n", | |
" | >>> X = np.random.random((10, 3)) # 10 points in 3 dimensions\n", | |
" | >>> tree = KDTree(X, leaf_size=2) # doctest: +SKIP\n", | |
" | >>> print tree.query_radius(X[0], r=0.3, count_only=True)\n", | |
" | 3\n", | |
" | >>> ind = tree.query_radius(X[0], r=0.3) # doctest: +SKIP\n", | |
" | >>> print ind # indices of neighbors within distance 0.3\n", | |
" | [3 0 1]\n", | |
" | \n", | |
" | \n", | |
" | Compute a gaussian kernel density estimate:\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | >>> np.random.seed(1)\n", | |
" | >>> X = np.random.random((100, 3))\n", | |
" | >>> tree = KDTree(X) # doctest: +SKIP\n", | |
" | >>> tree.kernel_density(X[:3], h=0.1, kernel='gaussian')\n", | |
" | array([ 6.94114649, 7.83281226, 7.2071716 ])\n", | |
" | \n", | |
" | Compute a two-point auto-correlation function\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | >>> np.random.seed(0)\n", | |
" | >>> X = np.random.random((30, 3))\n", | |
" | >>> r = np.linspace(0, 1, 5)\n", | |
" | >>> tree = KDTree(X) # doctest: +SKIP\n", | |
" | >>> tree.two_point_correlation(X, r)\n", | |
" | array([ 30, 62, 278, 580, 820])\n", | |
" | \n", | |
" | Method resolution order:\n", | |
" | KDTree\n", | |
" | BinaryTree\n", | |
" | __builtin__.object\n", | |
" | \n", | |
" | Data and other attributes defined here:\n", | |
" | \n", | |
" | __new__ = <built-in method __new__ of type object>\n", | |
" | T.__new__(S, ...) -> a new object with type S, a subtype of T\n", | |
" | \n", | |
" | __pyx_vtable__ = <capsule object NULL>\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from BinaryTree:\n", | |
" | \n", | |
" | __getstate__(...)\n", | |
" | get state for pickling\n", | |
" | \n", | |
" | __init__(...)\n", | |
" | x.__init__(...) initializes x; see help(type(x)) for signature\n", | |
" | \n", | |
" | __reduce__(...)\n", | |
" | reduce method used for pickling\n", | |
" | \n", | |
" | __setstate__(...)\n", | |
" | set state for pickling\n", | |
" | \n", | |
" | get_arrays(...)\n", | |
" | \n", | |
" | get_n_calls(...)\n", | |
" | \n", | |
" | get_tree_stats(...)\n", | |
" | \n", | |
" | kernel_density(...)\n", | |
" | kernel_density(self, X, h, kernel='gaussian', atol=0, rtol=1E-8,\n", | |
" | breadth_first=True, return_log=False)\n", | |
" | \n", | |
" | Compute the kernel density estimate at points X with the given kernel,\n", | |
" | using the distance metric specified at tree creation.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array_like\n", | |
" | An array of points to query. Last dimension should match dimension\n", | |
" | of training data.\n", | |
" | h : float\n", | |
" | the bandwidth of the kernel\n", | |
" | kernel : string\n", | |
" | specify the kernel to use. Options are\n", | |
" | - 'gaussian'\n", | |
" | - 'tophat'\n", | |
" | - 'epanechnikov'\n", | |
" | - 'exponential'\n", | |
" | - 'linear'\n", | |
" | - 'cosine'\n", | |
" | Default is kernel = 'gaussian'\n", | |
" | atol, rtol : float (default = 0)\n", | |
" | Specify the desired relative and absolute tolerance of the result.\n", | |
" | If the true result is K_true, then the returned result K_ret\n", | |
" | satisfies ``abs(K_true - K_ret) < atol + rtol * K_ret``\n", | |
" | The default is zero (i.e. machine precision) for both.\n", | |
" | breadth_first : boolean (default = False)\n", | |
" | if True, use a breadth-first search. If False (default) use a\n", | |
" | depth-first search. Breadth-first is generally faster for\n", | |
" | compact kernels and/or high tolerances.\n", | |
" | return_log : boolean (default = False)\n", | |
" | return the logarithm of the result. This can be more accurate\n", | |
" | than returning the result itself for narrow kernels.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | density : ndarray\n", | |
" | The array of (log)-density evaluations, shape = X.shape[:-1]\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | Compute a gaussian kernel density estimate:\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | >>> np.random.seed(1)\n", | |
" | >>> X = np.random.random((100, 3))\n", | |
" | >>> tree = BinaryTree(X) # doctest: +SKIP\n", | |
" | >>> tree.kernel_density(X[:3], h=0.1, kernel='gaussian')\n", | |
" | array([ 6.94114649, 7.83281226, 7.2071716 ])\n", | |
" | \n", | |
" | query(...)\n", | |
" | query(X, k=1, return_distance=True,\n", | |
" | dualtree=False, breadth_first=False)\n", | |
" | \n", | |
" | query the tree for the k nearest neighbors\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, last dimension self.dim\n", | |
" | An array of points to query\n", | |
" | k : integer (default = 1)\n", | |
" | The number of nearest neighbors to return\n", | |
" | return_distance : boolean (default = True)\n", | |
" | if True, return a tuple (d, i) of distances and indices\n", | |
" | if False, return array i\n", | |
" | dualtree : boolean (default = False)\n", | |
" | if True, use the dual tree formalism for the query: a tree is\n", | |
" | built for the query points, and the pair of trees is used to\n", | |
" | efficiently search this space. This can lead to better\n", | |
" | performance as the number of points grows large.\n", | |
" | breadth_first : boolean (default = False)\n", | |
" | if True, then query the nodes in a breadth-first manner.\n", | |
" | Otherwise, query the nodes in a depth-first manner.\n", | |
" | sort_results : boolean (default = True)\n", | |
" | if True, then distances and indices of each point are sorted\n", | |
" | on return, so that the first column contains the closest points.\n", | |
" | Otherwise, neighbors are returned in an arbitrary order.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | i : if return_distance == False\n", | |
" | (d,i) : if return_distance == True\n", | |
" | \n", | |
" | d : array of doubles - shape: x.shape[:-1] + (k,)\n", | |
" | each entry gives the list of distances to the\n", | |
" | neighbors of the corresponding point\n", | |
" | \n", | |
" | i : array of integers - shape: x.shape[:-1] + (k,)\n", | |
" | each entry gives the list of indices of\n", | |
" | neighbors of the corresponding point\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | Query for k-nearest neighbors\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | >>> np.random.seed(0)\n", | |
" | >>> X = np.random.random((10, 3)) # 10 points in 3 dimensions\n", | |
" | >>> tree = BinaryTree(X, leaf_size=2) # doctest: +SKIP\n", | |
" | >>> dist, ind = tree.query(X[0], k=3) # doctest: +SKIP\n", | |
" | >>> print ind # indices of 3 closest neighbors\n", | |
" | [0 3 1]\n", | |
" | >>> print dist # distances to 3 closest neighbors\n", | |
" | [ 0. 0.19662693 0.29473397]\n", | |
" | \n", | |
" | query_radius(...)\n", | |
" | query_radius(self, X, r, count_only = False):\n", | |
" | \n", | |
" | query the tree for neighbors within a radius r\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, last dimension self.dim\n", | |
" | An array of points to query\n", | |
" | r : distance within which neighbors are returned\n", | |
" | r can be a single value, or an array of values of shape\n", | |
" | x.shape[:-1] if different radii are desired for each point.\n", | |
" | return_distance : boolean (default = False)\n", | |
" | if True, return distances to neighbors of each point\n", | |
" | if False, return only neighbors\n", | |
" | Note that unlike the query() method, setting return_distance=True\n", | |
" | here adds to the computation time. Not all distances need to be\n", | |
" | calculated explicitly for return_distance=False. Results are\n", | |
" | not sorted by default: see ``sort_results`` keyword.\n", | |
" | count_only : boolean (default = False)\n", | |
" | if True, return only the count of points within distance r\n", | |
" | if False, return the indices of all points within distance r\n", | |
" | If return_distance==True, setting count_only=True will\n", | |
" | result in an error.\n", | |
" | sort_results : boolean (default = False)\n", | |
" | if True, the distances and indices will be sorted before being\n", | |
" | returned. If False, the results will not be sorted. If\n", | |
" | return_distance == False, setting sort_results = True will\n", | |
" | result in an error.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | count : if count_only == True\n", | |
" | ind : if count_only == False and return_distance == False\n", | |
" | (ind, dist) : if count_only == False and return_distance == True\n", | |
" | \n", | |
" | count : array of integers, shape = X.shape[:-1]\n", | |
" | each entry gives the number of neighbors within\n", | |
" | a distance r of the corresponding point.\n", | |
" | \n", | |
" | ind : array of objects, shape = X.shape[:-1]\n", | |
" | each element is a numpy integer array listing the indices of\n", | |
" | neighbors of the corresponding point. Note that unlike\n", | |
" | the results of a k-neighbors query, the returned neighbors\n", | |
" | are not sorted by distance by default.\n", | |
" | \n", | |
" | dist : array of objects, shape = X.shape[:-1]\n", | |
" | each element is a numpy double array\n", | |
" | listing the distances corresponding to indices in i.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | Query for neighbors in a given radius\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | >>> np.random.seed(0)\n", | |
" | >>> X = np.random.random((10, 3)) # 10 points in 3 dimensions\n", | |
" | >>> tree = BinaryTree(X, leaf_size=2) # doctest: +SKIP\n", | |
" | >>> print tree.query_radius(X[0], r=0.3, count_only=True)\n", | |
" | 3\n", | |
" | >>> ind = tree.query_radius(X[0], r=0.3) # doctest: +SKIP\n", | |
" | >>> print ind # indices of neighbors within distance 0.3\n", | |
" | [3 0 1]\n", | |
" | \n", | |
" | reset_n_calls(...)\n", | |
" | \n", | |
" | two_point_correlation(...)\n", | |
" | Compute the two-point correlation function\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array_like\n", | |
" | An array of points to query. Last dimension should match dimension\n", | |
" | of training data.\n", | |
" | r : array_like\n", | |
" | A one-dimensional array of distances\n", | |
" | dualtree : boolean (default = False)\n", | |
" | If true, use a dualtree algorithm. Otherwise, use a single-tree\n", | |
" | algorithm. Dual tree algorithms can have better scaling for\n", | |
" | large N.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | counts : ndarray\n", | |
" | counts[i] contains the number of pairs of points with distance\n", | |
" | less than or equal to r[i]\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | Compute the two-point autocorrelation function of X:\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | >>> np.random.seed(0)\n", | |
" | >>> X = np.random.random((30, 3))\n", | |
" | >>> r = np.linspace(0, 1, 5)\n", | |
" | >>> tree = BinaryTree(X) # doctest: +SKIP\n", | |
" | >>> tree.two_point_correlation(X, r)\n", | |
" | array([ 30, 62, 278, 580, 820])\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Data descriptors inherited from BinaryTree:\n", | |
" | \n", | |
" | data\n", | |
" | \n", | |
" | idx_array\n", | |
" | \n", | |
" | node_bounds\n", | |
" | \n", | |
" | node_data\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Data and other attributes inherited from BinaryTree:\n", | |
" | \n", | |
" | valid_metrics = ['chebyshev', 'euclidean', 'cityblock', 'manhattan', '...\n", | |
" \n", | |
" class KNeighborsClassifier(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.KNeighborsMixin, sklearn.neighbors.base.SupervisedIntegerMixin, sklearn.base.ClassifierMixin)\n", | |
" | Classifier implementing the k-nearest neighbors vote.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | n_neighbors : int, optional (default = 5)\n", | |
" | Number of neighbors to use by default for :meth:`k_neighbors` queries.\n", | |
" | \n", | |
" | weights : str or callable\n", | |
" | weight function used in prediction. Possible values:\n", | |
" | \n", | |
" | - 'uniform' : uniform weights. All points in each neighborhood\n", | |
" | are weighted equally.\n", | |
" | - 'distance' : weight points by the inverse of their distance.\n", | |
" | in this case, closer neighbors of a query point will have a\n", | |
" | greater influence than neighbors which are further away.\n", | |
" | - [callable] : a user-defined function which accepts an\n", | |
" | array of distances, and returns an array of the same shape\n", | |
" | containing the weights.\n", | |
" | \n", | |
" | Uniform weights are used by default.\n", | |
" | \n", | |
" | algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional\n", | |
" | Algorithm used to compute the nearest neighbors:\n", | |
" | \n", | |
" | - 'ball_tree' will use :class:`BallTree`\n", | |
" | - 'kd_tree' will use :class:`KDTree`\n", | |
" | - 'brute' will use a brute-force search.\n", | |
" | - 'auto' will attempt to decide the most appropriate algorithm\n", | |
" | based on the values passed to :meth:`fit` method.\n", | |
" | \n", | |
" | Note: fitting on sparse input will override the setting of\n", | |
" | this parameter, using brute force.\n", | |
" | \n", | |
" | leaf_size : int, optional (default = 30)\n", | |
" | Leaf size passed to BallTree or KDTree. This can affect the\n", | |
" | speed of the construction and query, as well as the memory\n", | |
" | required to store the tree. The optimal value depends on the\n", | |
" | nature of the problem.\n", | |
" | \n", | |
" | metric : string or DistanceMetric object (default = 'minkowski')\n", | |
" | the distance metric to use for the tree. The default metric is\n", | |
" | minkowski, and with p=2 is equivalent to the standard Euclidean\n", | |
" | metric. See the documentation of the DistanceMetric class for a\n", | |
" | list of available metrics.\n", | |
" | \n", | |
" | p : integer, optional (default = 2)\n", | |
" | Power parameter for the Minkowski metric. When p = 1, this is\n", | |
" | equivalent to using manhattan_distance (l1), and euclidean_distance\n", | |
" | (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n", | |
" | \n", | |
" | metric_params: dict, optional (default = None)\n", | |
" | additional keyword arguments for the metric function.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | >>> X = [[0], [1], [2], [3]]\n", | |
" | >>> y = [0, 0, 1, 1]\n", | |
" | >>> from sklearn.neighbors import KNeighborsClassifier\n", | |
" | >>> neigh = KNeighborsClassifier(n_neighbors=3)\n", | |
" | >>> neigh.fit(X, y) # doctest: +ELLIPSIS\n", | |
" | KNeighborsClassifier(...)\n", | |
" | >>> print(neigh.predict([[1.1]]))\n", | |
" | [0]\n", | |
" | >>> print(neigh.predict_proba([[0.9]]))\n", | |
" | [[ 0.66666667 0.33333333]]\n", | |
" | \n", | |
" | See also\n", | |
" | --------\n", | |
" | RadiusNeighborsClassifier\n", | |
" | KNeighborsRegressor\n", | |
" | RadiusNeighborsRegressor\n", | |
" | NearestNeighbors\n", | |
" | \n", | |
" | Notes\n", | |
" | -----\n", | |
" | See :ref:`Nearest Neighbors <neighbors>` in the online documentation\n", | |
" | for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n", | |
" | \n", | |
" | .. warning::\n", | |
" | \n", | |
" | Regarding the Nearest Neighbors algorithms, if it is found that two\n", | |
" | neighbors, neighbor `k+1` and `k`, have identical distances but\n", | |
" | but different labels, the results will depend on the ordering of the\n", | |
" | training data.\n", | |
" | \n", | |
" | http://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n", | |
" | \n", | |
" | Method resolution order:\n", | |
" | KNeighborsClassifier\n", | |
" | sklearn.neighbors.base.NeighborsBase\n", | |
" | abc.NewBase\n", | |
" | sklearn.base.BaseEstimator\n", | |
" | sklearn.neighbors.base.KNeighborsMixin\n", | |
" | sklearn.neighbors.base.SupervisedIntegerMixin\n", | |
" | sklearn.base.ClassifierMixin\n", | |
" | __builtin__.object\n", | |
" | \n", | |
" | Methods defined here:\n", | |
" | \n", | |
" | __init__(self, n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None, **kwargs)\n", | |
" | \n", | |
" | predict(self, X)\n", | |
" | Predict the class labels for the provided data\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array of shape [n_samples, n_features]\n", | |
" | A 2-D array representing the test points.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | y : array of shape [n_samples] or [n_samples, n_outputs]\n", | |
" | Class labels for each data sample.\n", | |
" | \n", | |
" | predict_proba(self, X)\n", | |
" | Return probability estimates for the test data X.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array, shape = (n_samples, n_features)\n", | |
" | A 2-D array representing the test points.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | p : array of shape = [n_samples, n_classes], or a list of n_outputs\n", | |
" | of such arrays if n_outputs > 1.\n", | |
" | The class probabilities of the input samples. Classes are ordered\n", | |
" | by lexicographic order.\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Data and other attributes defined here:\n", | |
" | \n", | |
" | __abstractmethods__ = frozenset([])\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.base.BaseEstimator:\n", | |
" | \n", | |
" | __repr__(self)\n", | |
" | \n", | |
" | get_params(self, deep=True)\n", | |
" | Get parameters for this estimator.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | deep: boolean, optional\n", | |
" | If True, will return the parameters for this estimator and\n", | |
" | contained subobjects that are estimators.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | params : mapping of string to any\n", | |
" | Parameter names mapped to their values.\n", | |
" | \n", | |
" | set_params(self, **params)\n", | |
" | Set the parameters of this estimator.\n", | |
" | \n", | |
" | The method works on simple estimators as well as on nested objects\n", | |
" | (such as pipelines). The former have parameters of the form\n", | |
" | ``<component>__<parameter>`` so that it's possible to update each\n", | |
" | component of a nested object.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | self\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Data descriptors inherited from sklearn.base.BaseEstimator:\n", | |
" | \n", | |
" | __dict__\n", | |
" | dictionary for instance variables (if defined)\n", | |
" | \n", | |
" | __weakref__\n", | |
" | list of weak references to the object (if defined)\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.neighbors.base.KNeighborsMixin:\n", | |
" | \n", | |
" | kneighbors(self, X=None, n_neighbors=None, return_distance=True)\n", | |
" | Finds the K-neighbors of a point.\n", | |
" | \n", | |
" | Returns distance\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, last dimension same as that of fit data, optional\n", | |
" | The query point or points.\n", | |
" | If not provided, neighbors of each indexed point are returned.\n", | |
" | In this case, the query point is not considered its own neighbor.\n", | |
" | \n", | |
" | n_neighbors : int\n", | |
" | Number of neighbors to get (default is the value\n", | |
" | passed to the constructor).\n", | |
" | \n", | |
" | return_distance : boolean, optional. Defaults to True.\n", | |
" | If False, distances will not be returned\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | dist : array\n", | |
" | Array representing the lengths to points, only present if\n", | |
" | return_distance=True\n", | |
" | \n", | |
" | ind : array\n", | |
" | Indices of the nearest points in the population matrix.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | In the following example, we construct a NeighborsClassifier\n", | |
" | class from an array representing our data set and ask who's\n", | |
" | the closest point to [1,1,1]\n", | |
" | \n", | |
" | >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n", | |
" | >>> from sklearn.neighbors import NearestNeighbors\n", | |
" | >>> neigh = NearestNeighbors(n_neighbors=1)\n", | |
" | >>> neigh.fit(samples) # doctest: +ELLIPSIS\n", | |
" | NearestNeighbors(algorithm='auto', leaf_size=30, ...)\n", | |
" | >>> print(neigh.kneighbors([1., 1., 1.])) # doctest: +ELLIPSIS\n", | |
" | (array([[ 0.5]]), array([[2]]...))\n", | |
" | \n", | |
" | As you can see, it returns [[0.5]], and [[2]], which means that the\n", | |
" | element is at distance 0.5 and is the third element of samples\n", | |
" | (indexes start at 0). You can also query for multiple points:\n", | |
" | \n", | |
" | >>> X = [[0., 1., 0.], [1., 0., 1.]]\n", | |
" | >>> neigh.kneighbors(X, return_distance=False) # doctest: +ELLIPSIS\n", | |
" | array([[1],\n", | |
" | [2]]...)\n", | |
" | \n", | |
" | kneighbors_graph(self, X=None, n_neighbors=None, mode='connectivity')\n", | |
" | Computes the (weighted) graph of k-Neighbors for points in X\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, last dimension same as that of fit data, optional\n", | |
" | The query point or points.\n", | |
" | If not provided, neighbors of each indexed point are returned.\n", | |
" | In this case, the query point is not considered its own neighbor.\n", | |
" | \n", | |
" | n_neighbors : int\n", | |
" | Number of neighbors for each sample.\n", | |
" | (default is value passed to the constructor).\n", | |
" | \n", | |
" | mode : {'connectivity', 'distance'}, optional\n", | |
" | Type of returned matrix: 'connectivity' will return the\n", | |
" | connectivity matrix with ones and zeros, in 'distance' the\n", | |
" | edges are Euclidean distance between points.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | A : sparse matrix in CSR format, shape = [n_samples, n_samples_fit]\n", | |
" | n_samples_fit is the number of samples in the fitted data\n", | |
" | A[i, j] is assigned the weight of edge that connects i to j.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | >>> X = [[0], [3], [1]]\n", | |
" | >>> from sklearn.neighbors import NearestNeighbors\n", | |
" | >>> neigh = NearestNeighbors(n_neighbors=2)\n", | |
" | >>> neigh.fit(X) # doctest: +ELLIPSIS\n", | |
" | NearestNeighbors(algorithm='auto', leaf_size=30, ...)\n", | |
" | >>> A = neigh.kneighbors_graph(X)\n", | |
" | >>> A.toarray()\n", | |
" | array([[ 1., 0., 1.],\n", | |
" | [ 0., 1., 1.],\n", | |
" | [ 1., 0., 1.]])\n", | |
" | \n", | |
" | See also\n", | |
" | --------\n", | |
" | NearestNeighbors.radius_neighbors_graph\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.neighbors.base.SupervisedIntegerMixin:\n", | |
" | \n", | |
" | fit(self, X, y)\n", | |
" | Fit the model using X as training data and y as target values\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : {array-like, sparse matrix, BallTree, KDTree}\n", | |
" | Training data. If array or matrix, shape = [n_samples, n_features]\n", | |
" | \n", | |
" | y : {array-like, sparse matrix}\n", | |
" | Target values of shape = [n_samples] or [n_samples, n_outputs]\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.base.ClassifierMixin:\n", | |
" | \n", | |
" | score(self, X, y, sample_weight=None)\n", | |
" | Returns the mean accuracy on the given test data and labels.\n", | |
" | \n", | |
" | In multi-label classification, this is the subset accuracy\n", | |
" | which is a harsh metric since you require for each sample that\n", | |
" | each label set be correctly predicted.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, shape = (n_samples, n_features)\n", | |
" | Test samples.\n", | |
" | \n", | |
" | y : array-like, shape = (n_samples) or (n_samples, n_outputs)\n", | |
" | True labels for X.\n", | |
" | \n", | |
" | sample_weight : array-like, shape = [n_samples], optional\n", | |
" | Sample weights.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | score : float\n", | |
" | Mean accuracy of self.predict(X) wrt. y.\n", | |
" \n", | |
" class KNeighborsRegressor(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.KNeighborsMixin, sklearn.neighbors.base.SupervisedFloatMixin, sklearn.base.RegressorMixin)\n", | |
" | Regression based on k-nearest neighbors.\n", | |
" | \n", | |
" | The target is predicted by local interpolation of the targets\n", | |
" | associated of the nearest neighbors in the training set.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | n_neighbors : int, optional (default = 5)\n", | |
" | Number of neighbors to use by default for :meth:`k_neighbors` queries.\n", | |
" | \n", | |
" | weights : str or callable\n", | |
" | weight function used in prediction. Possible values:\n", | |
" | \n", | |
" | - 'uniform' : uniform weights. All points in each neighborhood\n", | |
" | are weighted equally.\n", | |
" | - 'distance' : weight points by the inverse of their distance.\n", | |
" | in this case, closer neighbors of a query point will have a\n", | |
" | greater influence than neighbors which are further away.\n", | |
" | - [callable] : a user-defined function which accepts an\n", | |
" | array of distances, and returns an array of the same shape\n", | |
" | containing the weights.\n", | |
" | \n", | |
" | Uniform weights are used by default.\n", | |
" | \n", | |
" | algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional\n", | |
" | Algorithm used to compute the nearest neighbors:\n", | |
" | \n", | |
" | - 'ball_tree' will use :class:`BallTree`\n", | |
" | - 'kd_tree' will use :class:`KDtree`\n", | |
" | - 'brute' will use a brute-force search.\n", | |
" | - 'auto' will attempt to decide the most appropriate algorithm\n", | |
" | based on the values passed to :meth:`fit` method.\n", | |
" | \n", | |
" | Note: fitting on sparse input will override the setting of\n", | |
" | this parameter, using brute force.\n", | |
" | \n", | |
" | leaf_size : int, optional (default = 30)\n", | |
" | Leaf size passed to BallTree or KDTree. This can affect the\n", | |
" | speed of the construction and query, as well as the memory\n", | |
" | required to store the tree. The optimal value depends on the\n", | |
" | nature of the problem.\n", | |
" | \n", | |
" | metric : string or DistanceMetric object (default='minkowski')\n", | |
" | the distance metric to use for the tree. The default metric is\n", | |
" | minkowski, and with p=2 is equivalent to the standard Euclidean\n", | |
" | metric. See the documentation of the DistanceMetric class for a\n", | |
" | list of available metrics.\n", | |
" | \n", | |
" | p : integer, optional (default = 2)\n", | |
" | Power parameter for the Minkowski metric. When p = 1, this is\n", | |
" | equivalent to using manhattan_distance (l1), and euclidean_distance\n", | |
" | (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n", | |
" | \n", | |
" | metric_params: dict, optional (default = None)\n", | |
" | additional keyword arguments for the metric function.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | >>> X = [[0], [1], [2], [3]]\n", | |
" | >>> y = [0, 0, 1, 1]\n", | |
" | >>> from sklearn.neighbors import KNeighborsRegressor\n", | |
" | >>> neigh = KNeighborsRegressor(n_neighbors=2)\n", | |
" | >>> neigh.fit(X, y) # doctest: +ELLIPSIS\n", | |
" | KNeighborsRegressor(...)\n", | |
" | >>> print(neigh.predict([[1.5]]))\n", | |
" | [ 0.5]\n", | |
" | \n", | |
" | See also\n", | |
" | --------\n", | |
" | NearestNeighbors\n", | |
" | RadiusNeighborsRegressor\n", | |
" | KNeighborsClassifier\n", | |
" | RadiusNeighborsClassifier\n", | |
" | \n", | |
" | Notes\n", | |
" | -----\n", | |
" | See :ref:`Nearest Neighbors <neighbors>` in the online documentation\n", | |
" | for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n", | |
" | \n", | |
" | .. warning::\n", | |
" | \n", | |
" | Regarding the Nearest Neighbors algorithms, if it is found that two\n", | |
" | neighbors, neighbor `k+1` and `k`, have identical distances but\n", | |
" | but different labels, the results will depend on the ordering of the\n", | |
" | training data.\n", | |
" | \n", | |
" | http://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n", | |
" | \n", | |
" | Method resolution order:\n", | |
" | KNeighborsRegressor\n", | |
" | sklearn.neighbors.base.NeighborsBase\n", | |
" | abc.NewBase\n", | |
" | sklearn.base.BaseEstimator\n", | |
" | sklearn.neighbors.base.KNeighborsMixin\n", | |
" | sklearn.neighbors.base.SupervisedFloatMixin\n", | |
" | sklearn.base.RegressorMixin\n", | |
" | __builtin__.object\n", | |
" | \n", | |
" | Methods defined here:\n", | |
" | \n", | |
" | __init__(self, n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None, **kwargs)\n", | |
" | \n", | |
" | predict(self, X)\n", | |
" | Predict the target for the provided data\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array or matrix, shape = [n_samples, n_features]\n", | |
" | \n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | y : array of int, shape = [n_samples] or [n_samples, n_outputs]\n", | |
" | Target values\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Data and other attributes defined here:\n", | |
" | \n", | |
" | __abstractmethods__ = frozenset([])\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.base.BaseEstimator:\n", | |
" | \n", | |
" | __repr__(self)\n", | |
" | \n", | |
" | get_params(self, deep=True)\n", | |
" | Get parameters for this estimator.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | deep: boolean, optional\n", | |
" | If True, will return the parameters for this estimator and\n", | |
" | contained subobjects that are estimators.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | params : mapping of string to any\n", | |
" | Parameter names mapped to their values.\n", | |
" | \n", | |
" | set_params(self, **params)\n", | |
" | Set the parameters of this estimator.\n", | |
" | \n", | |
" | The method works on simple estimators as well as on nested objects\n", | |
" | (such as pipelines). The former have parameters of the form\n", | |
" | ``<component>__<parameter>`` so that it's possible to update each\n", | |
" | component of a nested object.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | self\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Data descriptors inherited from sklearn.base.BaseEstimator:\n", | |
" | \n", | |
" | __dict__\n", | |
" | dictionary for instance variables (if defined)\n", | |
" | \n", | |
" | __weakref__\n", | |
" | list of weak references to the object (if defined)\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.neighbors.base.KNeighborsMixin:\n", | |
" | \n", | |
" | kneighbors(self, X=None, n_neighbors=None, return_distance=True)\n", | |
" | Finds the K-neighbors of a point.\n", | |
" | \n", | |
" | Returns distance\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, last dimension same as that of fit data, optional\n", | |
" | The query point or points.\n", | |
" | If not provided, neighbors of each indexed point are returned.\n", | |
" | In this case, the query point is not considered its own neighbor.\n", | |
" | \n", | |
" | n_neighbors : int\n", | |
" | Number of neighbors to get (default is the value\n", | |
" | passed to the constructor).\n", | |
" | \n", | |
" | return_distance : boolean, optional. Defaults to True.\n", | |
" | If False, distances will not be returned\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | dist : array\n", | |
" | Array representing the lengths to points, only present if\n", | |
" | return_distance=True\n", | |
" | \n", | |
" | ind : array\n", | |
" | Indices of the nearest points in the population matrix.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | In the following example, we construct a NeighborsClassifier\n", | |
" | class from an array representing our data set and ask who's\n", | |
" | the closest point to [1,1,1]\n", | |
" | \n", | |
" | >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n", | |
" | >>> from sklearn.neighbors import NearestNeighbors\n", | |
" | >>> neigh = NearestNeighbors(n_neighbors=1)\n", | |
" | >>> neigh.fit(samples) # doctest: +ELLIPSIS\n", | |
" | NearestNeighbors(algorithm='auto', leaf_size=30, ...)\n", | |
" | >>> print(neigh.kneighbors([1., 1., 1.])) # doctest: +ELLIPSIS\n", | |
" | (array([[ 0.5]]), array([[2]]...))\n", | |
" | \n", | |
" | As you can see, it returns [[0.5]], and [[2]], which means that the\n", | |
" | element is at distance 0.5 and is the third element of samples\n", | |
" | (indexes start at 0). You can also query for multiple points:\n", | |
" | \n", | |
" | >>> X = [[0., 1., 0.], [1., 0., 1.]]\n", | |
" | >>> neigh.kneighbors(X, return_distance=False) # doctest: +ELLIPSIS\n", | |
" | array([[1],\n", | |
" | [2]]...)\n", | |
" | \n", | |
" | kneighbors_graph(self, X=None, n_neighbors=None, mode='connectivity')\n", | |
" | Computes the (weighted) graph of k-Neighbors for points in X\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, last dimension same as that of fit data, optional\n", | |
" | The query point or points.\n", | |
" | If not provided, neighbors of each indexed point are returned.\n", | |
" | In this case, the query point is not considered its own neighbor.\n", | |
" | \n", | |
" | n_neighbors : int\n", | |
" | Number of neighbors for each sample.\n", | |
" | (default is value passed to the constructor).\n", | |
" | \n", | |
" | mode : {'connectivity', 'distance'}, optional\n", | |
" | Type of returned matrix: 'connectivity' will return the\n", | |
" | connectivity matrix with ones and zeros, in 'distance' the\n", | |
" | edges are Euclidean distance between points.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | A : sparse matrix in CSR format, shape = [n_samples, n_samples_fit]\n", | |
" | n_samples_fit is the number of samples in the fitted data\n", | |
" | A[i, j] is assigned the weight of edge that connects i to j.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | >>> X = [[0], [3], [1]]\n", | |
" | >>> from sklearn.neighbors import NearestNeighbors\n", | |
" | >>> neigh = NearestNeighbors(n_neighbors=2)\n", | |
" | >>> neigh.fit(X) # doctest: +ELLIPSIS\n", | |
" | NearestNeighbors(algorithm='auto', leaf_size=30, ...)\n", | |
" | >>> A = neigh.kneighbors_graph(X)\n", | |
" | >>> A.toarray()\n", | |
" | array([[ 1., 0., 1.],\n", | |
" | [ 0., 1., 1.],\n", | |
" | [ 1., 0., 1.]])\n", | |
" | \n", | |
" | See also\n", | |
" | --------\n", | |
" | NearestNeighbors.radius_neighbors_graph\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.neighbors.base.SupervisedFloatMixin:\n", | |
" | \n", | |
" | fit(self, X, y)\n", | |
" | Fit the model using X as training data and y as target values\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : {array-like, sparse matrix, BallTree, KDTree}\n", | |
" | Training data. If array or matrix, shape = [n_samples, n_features]\n", | |
" | \n", | |
" | y : {array-like, sparse matrix}\n", | |
" | Target values, array of float values, shape = [n_samples]\n", | |
" | or [n_samples, n_outputs]\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.base.RegressorMixin:\n", | |
" | \n", | |
" | score(self, X, y, sample_weight=None)\n", | |
" | Returns the coefficient of determination R^2 of the prediction.\n", | |
" | \n", | |
" | The coefficient R^2 is defined as (1 - u/v), where u is the regression\n", | |
" | sum of squares ((y_true - y_pred) ** 2).sum() and v is the residual\n", | |
" | sum of squares ((y_true - y_true.mean()) ** 2).sum().\n", | |
" | Best possible score is 1.0, lower values are worse.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, shape = (n_samples, n_features)\n", | |
" | Test samples.\n", | |
" | \n", | |
" | y : array-like, shape = (n_samples) or (n_samples, n_outputs)\n", | |
" | True values for X.\n", | |
" | \n", | |
" | sample_weight : array-like, shape = [n_samples], optional\n", | |
" | Sample weights.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | score : float\n", | |
" | R^2 of self.predict(X) wrt. y.\n", | |
" \n", | |
" class KernelDensity(sklearn.base.BaseEstimator)\n", | |
" | Kernel Density Estimation\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | bandwidth : float\n", | |
" | The bandwidth of the kernel.\n", | |
" | \n", | |
" | algorithm : string\n", | |
" | The tree algorithm to use. Valid options are\n", | |
" | ['kd_tree'|'ball_tree'|'auto']. Default is 'auto'.\n", | |
" | \n", | |
" | kernel : string\n", | |
" | The kernel to use. Valid kernels are\n", | |
" | ['gaussian'|'tophat'|'epanechnikov'|'exponential'|'linear'|'cosine']\n", | |
" | Default is 'gaussian'.\n", | |
" | \n", | |
" | metric : string\n", | |
" | The distance metric to use. Note that not all metrics are\n", | |
" | valid with all algorithms. Refer to the documentation of\n", | |
" | :class:`BallTree` and :class:`KDTree` for a description of\n", | |
" | available algorithms. Note that the normalization of the density\n", | |
" | output is correct only for the Euclidean distance metric. Default\n", | |
" | is 'euclidean'.\n", | |
" | \n", | |
" | atol : float\n", | |
" | The desired absolute tolerance of the result. A larger tolerance will\n", | |
" | generally lead to faster execution. Default is 0.\n", | |
" | \n", | |
" | rtol : float\n", | |
" | The desired relative tolerance of the result. A larger tolerance will\n", | |
" | generally lead to faster execution. Default is 1E-8.\n", | |
" | \n", | |
" | breadth_first : boolean\n", | |
" | If true (default), use a breadth-first approach to the problem.\n", | |
" | Otherwise use a depth-first approach.\n", | |
" | \n", | |
" | leaf_size : int\n", | |
" | Specify the leaf size of the underlying tree. See :class:`BallTree`\n", | |
" | or :class:`KDTree` for details. Default is 40.\n", | |
" | \n", | |
" | metric_params : dict\n", | |
" | Additional parameters to be passed to the tree for use with the\n", | |
" | metric. For more information, see the documentation of\n", | |
" | :class:`BallTree` or :class:`KDTree`.\n", | |
" | \n", | |
" | Method resolution order:\n", | |
" | KernelDensity\n", | |
" | sklearn.base.BaseEstimator\n", | |
" | __builtin__.object\n", | |
" | \n", | |
" | Methods defined here:\n", | |
" | \n", | |
" | __init__(self, bandwidth=1.0, algorithm='auto', kernel='gaussian', metric='euclidean', atol=0, rtol=0, breadth_first=True, leaf_size=40, metric_params=None)\n", | |
" | \n", | |
" | fit(self, X, y=None)\n", | |
" | Fit the Kernel Density model on the data.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array_like, shape (n_samples, n_features)\n", | |
" | List of n_features-dimensional data points. Each row\n", | |
" | corresponds to a single data point.\n", | |
" | \n", | |
" | sample(self, n_samples=1, random_state=None)\n", | |
" | Generate random samples from the model.\n", | |
" | \n", | |
" | Currently, this is implemented only for gaussian and tophat kernels.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | n_samples : int, optional\n", | |
" | Number of samples to generate. Defaults to 1.\n", | |
" | \n", | |
" | random_state : RandomState or an int seed (0 by default)\n", | |
" | A random number generator instance.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | X : array_like, shape (n_samples, n_features)\n", | |
" | List of samples.\n", | |
" | \n", | |
" | score(self, X, y=None)\n", | |
" | Compute the total log probability under the model.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array_like, shape (n_samples, n_features)\n", | |
" | List of n_features-dimensional data points. Each row\n", | |
" | corresponds to a single data point.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | logprob : float\n", | |
" | Total log-likelihood of the data in X.\n", | |
" | \n", | |
" | score_samples(self, X)\n", | |
" | Evaluate the density model on the data.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array_like, shape (n_samples, n_features)\n", | |
" | An array of points to query. Last dimension should match dimension\n", | |
" | of training data (n_features).\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | density : ndarray, shape (n_samples,)\n", | |
" | The array of log(density) evaluations.\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.base.BaseEstimator:\n", | |
" | \n", | |
" | __repr__(self)\n", | |
" | \n", | |
" | get_params(self, deep=True)\n", | |
" | Get parameters for this estimator.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | deep: boolean, optional\n", | |
" | If True, will return the parameters for this estimator and\n", | |
" | contained subobjects that are estimators.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | params : mapping of string to any\n", | |
" | Parameter names mapped to their values.\n", | |
" | \n", | |
" | set_params(self, **params)\n", | |
" | Set the parameters of this estimator.\n", | |
" | \n", | |
" | The method works on simple estimators as well as on nested objects\n", | |
" | (such as pipelines). The former have parameters of the form\n", | |
" | ``<component>__<parameter>`` so that it's possible to update each\n", | |
" | component of a nested object.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | self\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Data descriptors inherited from sklearn.base.BaseEstimator:\n", | |
" | \n", | |
" | __dict__\n", | |
" | dictionary for instance variables (if defined)\n", | |
" | \n", | |
" | __weakref__\n", | |
" | list of weak references to the object (if defined)\n", | |
" \n", | |
" class LSHForest(sklearn.base.BaseEstimator, sklearn.neighbors.base.KNeighborsMixin, sklearn.neighbors.base.RadiusNeighborsMixin)\n", | |
" | Performs approximate nearest neighbor search using LSH forest.\n", | |
" | \n", | |
" | LSH Forest: Locality Sensitive Hashing forest [1] is an alternative\n", | |
" | method for vanilla approximate nearest neighbor search methods.\n", | |
" | LSH forest data structure has been implemented using sorted\n", | |
" | arrays and binary search and 32 bit fixed-length hashes.\n", | |
" | Random projection is used as the hash family which approximates\n", | |
" | cosine distance.\n", | |
" | \n", | |
" | The cosine distance is defined as ``1 - cosine_similarity``: the lowest\n", | |
" | value is 0 (identical point) but it is bounded above by 2 for the farthest\n", | |
" | points. Its value does not depend on the norm of the vector points but\n", | |
" | only on their relative angles.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | \n", | |
" | n_estimators : int (default = 10)\n", | |
" | Number of trees in the LSH Forest.\n", | |
" | \n", | |
" | min_hash_match : int (default = 4)\n", | |
" | lowest hash length to be searched when candidate selection is\n", | |
" | performed for nearest neighbors.\n", | |
" | \n", | |
" | n_candidates : int (default = 10)\n", | |
" | Minimum number of candidates evaluated per estimator, assuming enough\n", | |
" | items meet the `min_hash_match` constraint.\n", | |
" | \n", | |
" | n_neighbors : int (default = 5)\n", | |
" | Number of neighbors to be returned from query function when\n", | |
" | it is not provided to the :meth:`kneighbors` method.\n", | |
" | \n", | |
" | radius : float, optinal (default = 1.0)\n", | |
" | Radius from the data point to its neighbors. This is the parameter\n", | |
" | space to use by default for the :meth`radius_neighbors` queries.\n", | |
" | \n", | |
" | radius_cutoff_ratio : float, optional (default = 0.9)\n", | |
" | A value ranges from 0 to 1. Radius neighbors will be searched until\n", | |
" | the ratio between total neighbors within the radius and the total\n", | |
" | candidates becomes less than this value unless it is terminated by\n", | |
" | hash length reaching `min_hash_match`.\n", | |
" | \n", | |
" | random_state : int, RandomState instance or None, optional (default=None)\n", | |
" | If int, random_state is the seed used by the random number generator;\n", | |
" | If RandomState instance, random_state is the random number generator;\n", | |
" | If None, the random number generator is the RandomState instance used\n", | |
" | by `np.random`.\n", | |
" | \n", | |
" | Attributes\n", | |
" | ----------\n", | |
" | \n", | |
" | hash_functions_ : list of GaussianRandomProjectionHash objects\n", | |
" | Hash function g(p,x) for a tree is an array of 32 randomly generated\n", | |
" | float arrays with the same dimenstion as the data set. This array is\n", | |
" | stored in GaussianRandomProjectionHash object and can be obtained\n", | |
" | from ``components_`` attribute.\n", | |
" | \n", | |
" | trees_ : array, shape (n_estimators, n_samples)\n", | |
" | Each tree (corresponding to a hash function) contains an array of\n", | |
" | sorted hashed values. The array representation may change in future\n", | |
" | versions.\n", | |
" | \n", | |
" | original_indices_ : array, shape (n_estimators, n_samples)\n", | |
" | Original indices of sorted hashed values in the fitted index.\n", | |
" | \n", | |
" | References\n", | |
" | ----------\n", | |
" | \n", | |
" | .. [1] M. Bawa, T. Condie and P. Ganesan, \"LSH Forest: Self-Tuning\n", | |
" | Indexes for Similarity Search\", WWW '05 Proceedings of the\n", | |
" | 14th international conference on World Wide Web, 651-660,\n", | |
" | 2005.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | >>> from sklearn.neighbors import LSHForest\n", | |
" | \n", | |
" | >>> X_train = [[5, 5, 2], [21, 5, 5], [1, 1, 1], [8, 9, 1], [6, 10, 2]]\n", | |
" | >>> X_test = [[9, 1, 6], [3, 1, 10], [7, 10, 3]]\n", | |
" | >>> lshf = LSHForest()\n", | |
" | >>> lshf.fit(X_train) # doctest: +NORMALIZE_WHITESPACE\n", | |
" | LSHForest(min_hash_match=4, n_candidates=50, n_estimators=10,\n", | |
" | n_neighbors=5, radius=1.0, radius_cutoff_ratio=0.9,\n", | |
" | random_state=None)\n", | |
" | >>> distances, indices = lshf.kneighbors(X_test, n_neighbors=2)\n", | |
" | >>> distances # doctest: +ELLIPSIS\n", | |
" | array([[ 0.069..., 0.149...],\n", | |
" | [ 0.229..., 0.481...],\n", | |
" | [ 0.004..., 0.014...]])\n", | |
" | >>> indices\n", | |
" | array([[1, 2],\n", | |
" | [2, 0],\n", | |
" | [4, 0]])\n", | |
" | \n", | |
" | Method resolution order:\n", | |
" | LSHForest\n", | |
" | sklearn.base.BaseEstimator\n", | |
" | sklearn.neighbors.base.KNeighborsMixin\n", | |
" | sklearn.neighbors.base.RadiusNeighborsMixin\n", | |
" | __builtin__.object\n", | |
" | \n", | |
" | Methods defined here:\n", | |
" | \n", | |
" | __init__(self, n_estimators=10, radius=1.0, n_candidates=50, n_neighbors=5, min_hash_match=4, radius_cutoff_ratio=0.9, random_state=None)\n", | |
" | \n", | |
" | fit(self, X, y=None)\n", | |
" | Fit the LSH forest on the data.\n", | |
" | \n", | |
" | This creates binary hashes of input data points by getting the\n", | |
" | dot product of input points and hash_function then\n", | |
" | transforming the projection into a binary string array based\n", | |
" | on the sign (positive/negative) of the projection.\n", | |
" | A sorted array of binary hashes is created.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array_like or sparse (CSR) matrix, shape (n_samples, n_features)\n", | |
" | List of n_features-dimensional data points. Each row\n", | |
" | corresponds to a single data point.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | self : object\n", | |
" | Returns self.\n", | |
" | \n", | |
" | kneighbors(self, X, n_neighbors=None, return_distance=True)\n", | |
" | Returns n_neighbors of approximate nearest neighbors.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array_like or sparse (CSR) matrix, shape (n_samples, n_features)\n", | |
" | List of n_features-dimensional data points. Each row\n", | |
" | corresponds to a single query.\n", | |
" | \n", | |
" | n_neighbors : int, opitonal (default = None)\n", | |
" | Number of neighbors required. If not provided, this will\n", | |
" | return the number specified at the initialization.\n", | |
" | \n", | |
" | return_distance : boolean, optional (default = False)\n", | |
" | Returns the distances of neighbors if set to True.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | dist : array, shape (n_samples, n_neighbors)\n", | |
" | Array representing the cosine distances to each point,\n", | |
" | only present if return_distance=True.\n", | |
" | \n", | |
" | ind : array, shape (n_samples, n_neighbors)\n", | |
" | Indices of the approximate nearest points in the population\n", | |
" | matrix.\n", | |
" | \n", | |
" | partial_fit(self, X, y=None)\n", | |
" | Inserts new data into the already fitted LSH Forest.\n", | |
" | Cost is proportional to new total size, so additions\n", | |
" | should be batched.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array_like or sparse (CSR) matrix, shape (n_samples, n_features)\n", | |
" | New data point to be inserted into the LSH Forest.\n", | |
" | \n", | |
" | radius_neighbors(self, X, radius=None, return_distance=True)\n", | |
" | Finds the neighbors within a given radius of a point or points.\n", | |
" | \n", | |
" | Return the indices and distances of some points from the dataset\n", | |
" | lying in a ball with size ``radius`` around the points of the query\n", | |
" | array. Points lying on the boundary are included in the results.\n", | |
" | \n", | |
" | The result points are *not* necessarily sorted by distance to their\n", | |
" | query point.\n", | |
" | \n", | |
" | LSH Forest being an approximate method, some true neighbors from the\n", | |
" | indexed dataset might be missing from the results.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array_like or sparse (CSR) matrix, shape (n_samples, n_features)\n", | |
" | List of n_features-dimensional data points. Each row\n", | |
" | corresponds to a single query.\n", | |
" | \n", | |
" | radius : float\n", | |
" | Limiting distance of neighbors to return.\n", | |
" | (default is the value passed to the constructor).\n", | |
" | \n", | |
" | return_distance : boolean, optional (default = False)\n", | |
" | Returns the distances of neighbors if set to True.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | dist : array, shape (n_samples,) of arrays\n", | |
" | Each element is an array representing the cosine distances\n", | |
" | to some points found within ``radius`` of the respective query.\n", | |
" | Only present if ``return_distance=True``.\n", | |
" | \n", | |
" | ind : array, shape (n_samples,) of arrays\n", | |
" | Each element is an array of indices for neighbors within ``radius``\n", | |
" | of the respective query.\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.base.BaseEstimator:\n", | |
" | \n", | |
" | __repr__(self)\n", | |
" | \n", | |
" | get_params(self, deep=True)\n", | |
" | Get parameters for this estimator.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | deep: boolean, optional\n", | |
" | If True, will return the parameters for this estimator and\n", | |
" | contained subobjects that are estimators.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | params : mapping of string to any\n", | |
" | Parameter names mapped to their values.\n", | |
" | \n", | |
" | set_params(self, **params)\n", | |
" | Set the parameters of this estimator.\n", | |
" | \n", | |
" | The method works on simple estimators as well as on nested objects\n", | |
" | (such as pipelines). The former have parameters of the form\n", | |
" | ``<component>__<parameter>`` so that it's possible to update each\n", | |
" | component of a nested object.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | self\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Data descriptors inherited from sklearn.base.BaseEstimator:\n", | |
" | \n", | |
" | __dict__\n", | |
" | dictionary for instance variables (if defined)\n", | |
" | \n", | |
" | __weakref__\n", | |
" | list of weak references to the object (if defined)\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.neighbors.base.KNeighborsMixin:\n", | |
" | \n", | |
" | kneighbors_graph(self, X=None, n_neighbors=None, mode='connectivity')\n", | |
" | Computes the (weighted) graph of k-Neighbors for points in X\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, last dimension same as that of fit data, optional\n", | |
" | The query point or points.\n", | |
" | If not provided, neighbors of each indexed point are returned.\n", | |
" | In this case, the query point is not considered its own neighbor.\n", | |
" | \n", | |
" | n_neighbors : int\n", | |
" | Number of neighbors for each sample.\n", | |
" | (default is value passed to the constructor).\n", | |
" | \n", | |
" | mode : {'connectivity', 'distance'}, optional\n", | |
" | Type of returned matrix: 'connectivity' will return the\n", | |
" | connectivity matrix with ones and zeros, in 'distance' the\n", | |
" | edges are Euclidean distance between points.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | A : sparse matrix in CSR format, shape = [n_samples, n_samples_fit]\n", | |
" | n_samples_fit is the number of samples in the fitted data\n", | |
" | A[i, j] is assigned the weight of edge that connects i to j.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | >>> X = [[0], [3], [1]]\n", | |
" | >>> from sklearn.neighbors import NearestNeighbors\n", | |
" | >>> neigh = NearestNeighbors(n_neighbors=2)\n", | |
" | >>> neigh.fit(X) # doctest: +ELLIPSIS\n", | |
" | NearestNeighbors(algorithm='auto', leaf_size=30, ...)\n", | |
" | >>> A = neigh.kneighbors_graph(X)\n", | |
" | >>> A.toarray()\n", | |
" | array([[ 1., 0., 1.],\n", | |
" | [ 0., 1., 1.],\n", | |
" | [ 1., 0., 1.]])\n", | |
" | \n", | |
" | See also\n", | |
" | --------\n", | |
" | NearestNeighbors.radius_neighbors_graph\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.neighbors.base.RadiusNeighborsMixin:\n", | |
" | \n", | |
" | radius_neighbors_graph(self, X=None, radius=None, mode='connectivity')\n", | |
" | Computes the (weighted) graph of Neighbors for points in X\n", | |
" | \n", | |
" | Neighborhoods are restricted the points at a distance lower than\n", | |
" | radius.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, shape = [n_samples, n_features], optional\n", | |
" | The query point or points.\n", | |
" | If not provided, neighbors of each indexed point are returned.\n", | |
" | In this case, the query point is not considered its own neighbor.\n", | |
" | \n", | |
" | radius : float\n", | |
" | Radius of neighborhoods.\n", | |
" | (default is the value passed to the constructor).\n", | |
" | \n", | |
" | mode : {'connectivity', 'distance'}, optional\n", | |
" | Type of returned matrix: 'connectivity' will return the\n", | |
" | connectivity matrix with ones and zeros, in 'distance' the\n", | |
" | edges are Euclidean distance between points.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | A : sparse matrix in CSR format, shape = [n_samples, n_samples]\n", | |
" | A[i, j] is assigned the weight of edge that connects i to j.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | >>> X = [[0], [3], [1]]\n", | |
" | >>> from sklearn.neighbors import NearestNeighbors\n", | |
" | >>> neigh = NearestNeighbors(radius=1.5)\n", | |
" | >>> neigh.fit(X) # doctest: +ELLIPSIS\n", | |
" | NearestNeighbors(algorithm='auto', leaf_size=30, ...)\n", | |
" | >>> A = neigh.radius_neighbors_graph(X)\n", | |
" | >>> A.toarray()\n", | |
" | array([[ 1., 0., 1.],\n", | |
" | [ 0., 1., 0.],\n", | |
" | [ 1., 0., 1.]])\n", | |
" | \n", | |
" | See also\n", | |
" | --------\n", | |
" | kneighbors_graph\n", | |
" \n", | |
" class NearestCentroid(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin)\n", | |
" | Nearest centroid classifier.\n", | |
" | \n", | |
" | Each class is represented by its centroid, with test samples classified to\n", | |
" | the class with the nearest centroid.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | metric: string, or callable\n", | |
" | The metric to use when calculating distance between instances in a\n", | |
" | feature array. If metric is a string or callable, it must be one of\n", | |
" | the options allowed by metrics.pairwise.pairwise_distances for its\n", | |
" | metric parameter.\n", | |
" | The centroids for the samples corresponding to each class is the point\n", | |
" | from which the sum of the distances (according to the metric) of all\n", | |
" | samples that belong to that particular class are minimized.\n", | |
" | If the \"manhattan\" metric is provided, this centroid is the median and\n", | |
" | for all other metrics, the centroid is now set to be the mean.\n", | |
" | \n", | |
" | shrink_threshold : float, optional (default = None)\n", | |
" | Threshold for shrinking centroids to remove features.\n", | |
" | \n", | |
" | Attributes\n", | |
" | ----------\n", | |
" | centroids_ : array-like, shape = [n_classes, n_features]\n", | |
" | Centroid of each class\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | >>> from sklearn.neighbors.nearest_centroid import NearestCentroid\n", | |
" | >>> import numpy as np\n", | |
" | >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n", | |
" | >>> y = np.array([1, 1, 1, 2, 2, 2])\n", | |
" | >>> clf = NearestCentroid()\n", | |
" | >>> clf.fit(X, y)\n", | |
" | NearestCentroid(metric='euclidean', shrink_threshold=None)\n", | |
" | >>> print(clf.predict([[-0.8, -1]]))\n", | |
" | [1]\n", | |
" | \n", | |
" | See also\n", | |
" | --------\n", | |
" | sklearn.neighbors.KNeighborsClassifier: nearest neighbors classifier\n", | |
" | \n", | |
" | Notes\n", | |
" | -----\n", | |
" | When used for text classification with tf-idf vectors, this classifier is\n", | |
" | also known as the Rocchio classifier.\n", | |
" | \n", | |
" | References\n", | |
" | ----------\n", | |
" | Tibshirani, R., Hastie, T., Narasimhan, B., & Chu, G. (2002). Diagnosis of\n", | |
" | multiple cancer types by shrunken centroids of gene expression. Proceedings\n", | |
" | of the National Academy of Sciences of the United States of America,\n", | |
" | 99(10), 6567-6572. The National Academy of Sciences.\n", | |
" | \n", | |
" | Method resolution order:\n", | |
" | NearestCentroid\n", | |
" | sklearn.base.BaseEstimator\n", | |
" | sklearn.base.ClassifierMixin\n", | |
" | __builtin__.object\n", | |
" | \n", | |
" | Methods defined here:\n", | |
" | \n", | |
" | __init__(self, metric='euclidean', shrink_threshold=None)\n", | |
" | \n", | |
" | fit(self, X, y)\n", | |
" | Fit the NearestCentroid model according to the given training data.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : {array-like, sparse matrix}, shape = [n_samples, n_features]\n", | |
" | Training vector, where n_samples in the number of samples and\n", | |
" | n_features is the number of features.\n", | |
" | Note that centroid shrinking cannot be used with sparse matrices.\n", | |
" | y : array, shape = [n_samples]\n", | |
" | Target values (integers)\n", | |
" | \n", | |
" | predict(self, X)\n", | |
" | Perform classification on an array of test vectors X.\n", | |
" | \n", | |
" | The predicted class C for each sample in X is returned.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, shape = [n_samples, n_features]\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | C : array, shape = [n_samples]\n", | |
" | \n", | |
" | Notes\n", | |
" | -----\n", | |
" | If the metric constructor parameter is \"precomputed\", X is assumed to\n", | |
" | be the distance matrix between the data to be predicted and\n", | |
" | ``self.centroids_``.\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.base.BaseEstimator:\n", | |
" | \n", | |
" | __repr__(self)\n", | |
" | \n", | |
" | get_params(self, deep=True)\n", | |
" | Get parameters for this estimator.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | deep: boolean, optional\n", | |
" | If True, will return the parameters for this estimator and\n", | |
" | contained subobjects that are estimators.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | params : mapping of string to any\n", | |
" | Parameter names mapped to their values.\n", | |
" | \n", | |
" | set_params(self, **params)\n", | |
" | Set the parameters of this estimator.\n", | |
" | \n", | |
" | The method works on simple estimators as well as on nested objects\n", | |
" | (such as pipelines). The former have parameters of the form\n", | |
" | ``<component>__<parameter>`` so that it's possible to update each\n", | |
" | component of a nested object.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | self\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Data descriptors inherited from sklearn.base.BaseEstimator:\n", | |
" | \n", | |
" | __dict__\n", | |
" | dictionary for instance variables (if defined)\n", | |
" | \n", | |
" | __weakref__\n", | |
" | list of weak references to the object (if defined)\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.base.ClassifierMixin:\n", | |
" | \n", | |
" | score(self, X, y, sample_weight=None)\n", | |
" | Returns the mean accuracy on the given test data and labels.\n", | |
" | \n", | |
" | In multi-label classification, this is the subset accuracy\n", | |
" | which is a harsh metric since you require for each sample that\n", | |
" | each label set be correctly predicted.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, shape = (n_samples, n_features)\n", | |
" | Test samples.\n", | |
" | \n", | |
" | y : array-like, shape = (n_samples) or (n_samples, n_outputs)\n", | |
" | True labels for X.\n", | |
" | \n", | |
" | sample_weight : array-like, shape = [n_samples], optional\n", | |
" | Sample weights.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | score : float\n", | |
" | Mean accuracy of self.predict(X) wrt. y.\n", | |
" \n", | |
" class NearestNeighbors(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.KNeighborsMixin, sklearn.neighbors.base.RadiusNeighborsMixin, sklearn.neighbors.base.UnsupervisedMixin)\n", | |
" | Unsupervised learner for implementing neighbor searches.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | n_neighbors : int, optional (default = 5)\n", | |
" | Number of neighbors to use by default for :meth:`k_neighbors` queries.\n", | |
" | \n", | |
" | radius : float, optional (default = 1.0)\n", | |
" | Range of parameter space to use by default for :meth`radius_neighbors`\n", | |
" | queries.\n", | |
" | \n", | |
" | algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional\n", | |
" | Algorithm used to compute the nearest neighbors:\n", | |
" | \n", | |
" | - 'ball_tree' will use :class:`BallTree`\n", | |
" | - 'kd_tree' will use :class:`KDtree`\n", | |
" | - 'brute' will use a brute-force search.\n", | |
" | - 'auto' will attempt to decide the most appropriate algorithm\n", | |
" | based on the values passed to :meth:`fit` method.\n", | |
" | \n", | |
" | Note: fitting on sparse input will override the setting of\n", | |
" | this parameter, using brute force.\n", | |
" | \n", | |
" | leaf_size : int, optional (default = 30)\n", | |
" | Leaf size passed to BallTree or KDTree. This can affect the\n", | |
" | speed of the construction and query, as well as the memory\n", | |
" | required to store the tree. The optimal value depends on the\n", | |
" | nature of the problem.\n", | |
" | \n", | |
" | p: integer, optional (default = 2)\n", | |
" | Parameter for the Minkowski metric from\n", | |
" | sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is\n", | |
" | equivalent to using manhattan_distance (l1), and euclidean_distance\n", | |
" | (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n", | |
" | \n", | |
" | metric : string or callable, default 'minkowski'\n", | |
" | metric to use for distance computation. Any metric from scikit-learn\n", | |
" | or scipy.spatial.distance can be used.\n", | |
" | \n", | |
" | If metric is a callable function, it is called on each\n", | |
" | pair of instances (rows) and the resulting value recorded. The callable\n", | |
" | should take two arrays as input and return one value indicating the\n", | |
" | distance between them. This works for Scipy's metrics, but is less\n", | |
" | efficient than passing the metric name as a string.\n", | |
" | \n", | |
" | Distance matrices are not supported.\n", | |
" | \n", | |
" | Valid values for metric are:\n", | |
" | \n", | |
" | - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n", | |
" | 'manhattan']\n", | |
" | \n", | |
" | - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n", | |
" | 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n", | |
" | 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto',\n", | |
" | 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath',\n", | |
" | 'sqeuclidean', 'yule']\n", | |
" | \n", | |
" | See the documentation for scipy.spatial.distance for details on these\n", | |
" | metrics.\n", | |
" | \n", | |
" | metric_params: dict, optional (default = None)\n", | |
" | additional keyword arguments for the metric function.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | >>> import numpy as np\n", | |
" | >>> from sklearn.neighbors import NearestNeighbors\n", | |
" | >>> samples = [[0, 0, 2], [1, 0, 0], [0, 0, 1]]\n", | |
" | \n", | |
" | >>> neigh = NearestNeighbors(2, 0.4)\n", | |
" | >>> neigh.fit(samples) #doctest: +ELLIPSIS\n", | |
" | NearestNeighbors(...)\n", | |
" | \n", | |
" | >>> neigh.kneighbors([[0, 0, 1.3]], 2, return_distance=False)\n", | |
" | ... #doctest: +ELLIPSIS\n", | |
" | array([[2, 0]]...)\n", | |
" | \n", | |
" | >>> rng = neigh.radius_neighbors([0, 0, 1.3], 0.4, return_distance=False)\n", | |
" | >>> np.asarray(rng[0][0])\n", | |
" | array(2)\n", | |
" | \n", | |
" | See also\n", | |
" | --------\n", | |
" | KNeighborsClassifier\n", | |
" | RadiusNeighborsClassifier\n", | |
" | KNeighborsRegressor\n", | |
" | RadiusNeighborsRegressor\n", | |
" | BallTree\n", | |
" | \n", | |
" | Notes\n", | |
" | -----\n", | |
" | See :ref:`Nearest Neighbors <neighbors>` in the online documentation\n", | |
" | for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n", | |
" | \n", | |
" | http://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n", | |
" | \n", | |
" | Method resolution order:\n", | |
" | NearestNeighbors\n", | |
" | sklearn.neighbors.base.NeighborsBase\n", | |
" | abc.NewBase\n", | |
" | sklearn.base.BaseEstimator\n", | |
" | sklearn.neighbors.base.KNeighborsMixin\n", | |
" | sklearn.neighbors.base.RadiusNeighborsMixin\n", | |
" | sklearn.neighbors.base.UnsupervisedMixin\n", | |
" | __builtin__.object\n", | |
" | \n", | |
" | Methods defined here:\n", | |
" | \n", | |
" | __init__(self, n_neighbors=5, radius=1.0, algorithm='auto', leaf_size=30, metric='minkowski', p=2, metric_params=None, **kwargs)\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Data and other attributes defined here:\n", | |
" | \n", | |
" | __abstractmethods__ = frozenset([])\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.base.BaseEstimator:\n", | |
" | \n", | |
" | __repr__(self)\n", | |
" | \n", | |
" | get_params(self, deep=True)\n", | |
" | Get parameters for this estimator.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | deep: boolean, optional\n", | |
" | If True, will return the parameters for this estimator and\n", | |
" | contained subobjects that are estimators.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | params : mapping of string to any\n", | |
" | Parameter names mapped to their values.\n", | |
" | \n", | |
" | set_params(self, **params)\n", | |
" | Set the parameters of this estimator.\n", | |
" | \n", | |
" | The method works on simple estimators as well as on nested objects\n", | |
" | (such as pipelines). The former have parameters of the form\n", | |
" | ``<component>__<parameter>`` so that it's possible to update each\n", | |
" | component of a nested object.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | self\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Data descriptors inherited from sklearn.base.BaseEstimator:\n", | |
" | \n", | |
" | __dict__\n", | |
" | dictionary for instance variables (if defined)\n", | |
" | \n", | |
" | __weakref__\n", | |
" | list of weak references to the object (if defined)\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.neighbors.base.KNeighborsMixin:\n", | |
" | \n", | |
" | kneighbors(self, X=None, n_neighbors=None, return_distance=True)\n", | |
" | Finds the K-neighbors of a point.\n", | |
" | \n", | |
" | Returns distance\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, last dimension same as that of fit data, optional\n", | |
" | The query point or points.\n", | |
" | If not provided, neighbors of each indexed point are returned.\n", | |
" | In this case, the query point is not considered its own neighbor.\n", | |
" | \n", | |
" | n_neighbors : int\n", | |
" | Number of neighbors to get (default is the value\n", | |
" | passed to the constructor).\n", | |
" | \n", | |
" | return_distance : boolean, optional. Defaults to True.\n", | |
" | If False, distances will not be returned\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | dist : array\n", | |
" | Array representing the lengths to points, only present if\n", | |
" | return_distance=True\n", | |
" | \n", | |
" | ind : array\n", | |
" | Indices of the nearest points in the population matrix.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | In the following example, we construct a NeighborsClassifier\n", | |
" | class from an array representing our data set and ask who's\n", | |
" | the closest point to [1,1,1]\n", | |
" | \n", | |
" | >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n", | |
" | >>> from sklearn.neighbors import NearestNeighbors\n", | |
" | >>> neigh = NearestNeighbors(n_neighbors=1)\n", | |
" | >>> neigh.fit(samples) # doctest: +ELLIPSIS\n", | |
" | NearestNeighbors(algorithm='auto', leaf_size=30, ...)\n", | |
" | >>> print(neigh.kneighbors([1., 1., 1.])) # doctest: +ELLIPSIS\n", | |
" | (array([[ 0.5]]), array([[2]]...))\n", | |
" | \n", | |
" | As you can see, it returns [[0.5]], and [[2]], which means that the\n", | |
" | element is at distance 0.5 and is the third element of samples\n", | |
" | (indexes start at 0). You can also query for multiple points:\n", | |
" | \n", | |
" | >>> X = [[0., 1., 0.], [1., 0., 1.]]\n", | |
" | >>> neigh.kneighbors(X, return_distance=False) # doctest: +ELLIPSIS\n", | |
" | array([[1],\n", | |
" | [2]]...)\n", | |
" | \n", | |
" | kneighbors_graph(self, X=None, n_neighbors=None, mode='connectivity')\n", | |
" | Computes the (weighted) graph of k-Neighbors for points in X\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, last dimension same as that of fit data, optional\n", | |
" | The query point or points.\n", | |
" | If not provided, neighbors of each indexed point are returned.\n", | |
" | In this case, the query point is not considered its own neighbor.\n", | |
" | \n", | |
" | n_neighbors : int\n", | |
" | Number of neighbors for each sample.\n", | |
" | (default is value passed to the constructor).\n", | |
" | \n", | |
" | mode : {'connectivity', 'distance'}, optional\n", | |
" | Type of returned matrix: 'connectivity' will return the\n", | |
" | connectivity matrix with ones and zeros, in 'distance' the\n", | |
" | edges are Euclidean distance between points.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | A : sparse matrix in CSR format, shape = [n_samples, n_samples_fit]\n", | |
" | n_samples_fit is the number of samples in the fitted data\n", | |
" | A[i, j] is assigned the weight of edge that connects i to j.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | >>> X = [[0], [3], [1]]\n", | |
" | >>> from sklearn.neighbors import NearestNeighbors\n", | |
" | >>> neigh = NearestNeighbors(n_neighbors=2)\n", | |
" | >>> neigh.fit(X) # doctest: +ELLIPSIS\n", | |
" | NearestNeighbors(algorithm='auto', leaf_size=30, ...)\n", | |
" | >>> A = neigh.kneighbors_graph(X)\n", | |
" | >>> A.toarray()\n", | |
" | array([[ 1., 0., 1.],\n", | |
" | [ 0., 1., 1.],\n", | |
" | [ 1., 0., 1.]])\n", | |
" | \n", | |
" | See also\n", | |
" | --------\n", | |
" | NearestNeighbors.radius_neighbors_graph\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.neighbors.base.RadiusNeighborsMixin:\n", | |
" | \n", | |
" | radius_neighbors(self, X=None, radius=None, return_distance=True)\n", | |
" | Finds the neighbors within a given radius of a point or points.\n", | |
" | \n", | |
" | Return the indices and distances of each point from the dataset\n", | |
" | lying in a ball with size ``radius`` around the points of the query\n", | |
" | array. Points lying on the boundary are included in the results.\n", | |
" | \n", | |
" | The result points are *not* necessarily sorted by distance to their\n", | |
" | query point.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, (n_samples, n_features), optional\n", | |
" | The query point or points.\n", | |
" | If not provided, neighbors of each indexed point are returned.\n", | |
" | In this case, the query point is not considered its own neighbor.\n", | |
" | \n", | |
" | radius : float\n", | |
" | Limiting distance of neighbors to return.\n", | |
" | (default is the value passed to the constructor).\n", | |
" | \n", | |
" | return_distance : boolean, optional. Defaults to True.\n", | |
" | If False, distances will not be returned\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | dist : array, shape (n_samples,) of arrays\n", | |
" | Array representing the distances to each point, only present if\n", | |
" | return_distance=True. The distance values are computed according\n", | |
" | to the ``metric`` constructor parameter.\n", | |
" | \n", | |
" | ind : array, shape (n_samples,) of arrays\n", | |
" | An array of arrays of indices of the approximate nearest points\n", | |
" | from the population matrix that lie within a ball of size\n", | |
" | ``radius`` around the query points.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | In the following example, we construct a NeighborsClassifier\n", | |
" | class from an array representing our data set and ask who's\n", | |
" | the closest point to [1, 1, 1]:\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n", | |
" | >>> from sklearn.neighbors import NearestNeighbors\n", | |
" | >>> neigh = NearestNeighbors(radius=1.6)\n", | |
" | >>> neigh.fit(samples) # doctest: +ELLIPSIS\n", | |
" | NearestNeighbors(algorithm='auto', leaf_size=30, ...)\n", | |
" | >>> rng = neigh.radius_neighbors([1., 1., 1.])\n", | |
" | >>> print(np.asarray(rng[0][0])) # doctest: +ELLIPSIS\n", | |
" | [ 1.5 0.5]\n", | |
" | >>> print(np.asarray(rng[1][0])) # doctest: +ELLIPSIS\n", | |
" | [1 2]\n", | |
" | \n", | |
" | The first array returned contains the distances to all points which\n", | |
" | are closer than 1.6, while the second array returned contains their\n", | |
" | indices. In general, multiple points can be queried at the same time.\n", | |
" | \n", | |
" | Notes\n", | |
" | -----\n", | |
" | Because the number of neighbors of each point is not necessarily\n", | |
" | equal, the results for multiple query points cannot be fit in a\n", | |
" | standard data array.\n", | |
" | For efficiency, `radius_neighbors` returns arrays of objects, where\n", | |
" | each object is a 1D array of indices or distances.\n", | |
" | \n", | |
" | radius_neighbors_graph(self, X=None, radius=None, mode='connectivity')\n", | |
" | Computes the (weighted) graph of Neighbors for points in X\n", | |
" | \n", | |
" | Neighborhoods are restricted the points at a distance lower than\n", | |
" | radius.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, shape = [n_samples, n_features], optional\n", | |
" | The query point or points.\n", | |
" | If not provided, neighbors of each indexed point are returned.\n", | |
" | In this case, the query point is not considered its own neighbor.\n", | |
" | \n", | |
" | radius : float\n", | |
" | Radius of neighborhoods.\n", | |
" | (default is the value passed to the constructor).\n", | |
" | \n", | |
" | mode : {'connectivity', 'distance'}, optional\n", | |
" | Type of returned matrix: 'connectivity' will return the\n", | |
" | connectivity matrix with ones and zeros, in 'distance' the\n", | |
" | edges are Euclidean distance between points.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | A : sparse matrix in CSR format, shape = [n_samples, n_samples]\n", | |
" | A[i, j] is assigned the weight of edge that connects i to j.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | >>> X = [[0], [3], [1]]\n", | |
" | >>> from sklearn.neighbors import NearestNeighbors\n", | |
" | >>> neigh = NearestNeighbors(radius=1.5)\n", | |
" | >>> neigh.fit(X) # doctest: +ELLIPSIS\n", | |
" | NearestNeighbors(algorithm='auto', leaf_size=30, ...)\n", | |
" | >>> A = neigh.radius_neighbors_graph(X)\n", | |
" | >>> A.toarray()\n", | |
" | array([[ 1., 0., 1.],\n", | |
" | [ 0., 1., 0.],\n", | |
" | [ 1., 0., 1.]])\n", | |
" | \n", | |
" | See also\n", | |
" | --------\n", | |
" | kneighbors_graph\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.neighbors.base.UnsupervisedMixin:\n", | |
" | \n", | |
" | fit(self, X, y=None)\n", | |
" | Fit the model using X as training data\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : {array-like, sparse matrix, BallTree, KDTree}\n", | |
" | Training data. If array or matrix, shape = [n_samples, n_features]\n", | |
" \n", | |
" class RadiusNeighborsClassifier(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.RadiusNeighborsMixin, sklearn.neighbors.base.SupervisedIntegerMixin, sklearn.base.ClassifierMixin)\n", | |
" | Classifier implementing a vote among neighbors within a given radius\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | radius : float, optional (default = 1.0)\n", | |
" | Range of parameter space to use by default for :meth`radius_neighbors`\n", | |
" | queries.\n", | |
" | \n", | |
" | weights : str or callable\n", | |
" | weight function used in prediction. Possible values:\n", | |
" | \n", | |
" | - 'uniform' : uniform weights. All points in each neighborhood\n", | |
" | are weighted equally.\n", | |
" | - 'distance' : weight points by the inverse of their distance.\n", | |
" | in this case, closer neighbors of a query point will have a\n", | |
" | greater influence than neighbors which are further away.\n", | |
" | - [callable] : a user-defined function which accepts an\n", | |
" | array of distances, and returns an array of the same shape\n", | |
" | containing the weights.\n", | |
" | \n", | |
" | Uniform weights are used by default.\n", | |
" | \n", | |
" | algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional\n", | |
" | Algorithm used to compute the nearest neighbors:\n", | |
" | \n", | |
" | - 'ball_tree' will use :class:`BallTree`\n", | |
" | - 'kd_tree' will use :class:`KDtree`\n", | |
" | - 'brute' will use a brute-force search.\n", | |
" | - 'auto' will attempt to decide the most appropriate algorithm\n", | |
" | based on the values passed to :meth:`fit` method.\n", | |
" | \n", | |
" | Note: fitting on sparse input will override the setting of\n", | |
" | this parameter, using brute force.\n", | |
" | \n", | |
" | leaf_size : int, optional (default = 30)\n", | |
" | Leaf size passed to BallTree or KDTree. This can affect the\n", | |
" | speed of the construction and query, as well as the memory\n", | |
" | required to store the tree. The optimal value depends on the\n", | |
" | nature of the problem.\n", | |
" | \n", | |
" | metric : string or DistanceMetric object (default='minkowski')\n", | |
" | the distance metric to use for the tree. The default metric is\n", | |
" | minkowski, and with p=2 is equivalent to the standard Euclidean\n", | |
" | metric. See the documentation of the DistanceMetric class for a\n", | |
" | list of available metrics.\n", | |
" | \n", | |
" | p : integer, optional (default = 2)\n", | |
" | Power parameter for the Minkowski metric. When p = 1, this is\n", | |
" | equivalent to using manhattan_distance (l1), and euclidean_distance\n", | |
" | (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n", | |
" | \n", | |
" | outlier_label : int, optional (default = None)\n", | |
" | Label, which is given for outlier samples (samples with no\n", | |
" | neighbors on given radius).\n", | |
" | If set to None, ValueError is raised, when outlier is detected.\n", | |
" | \n", | |
" | metric_params: dict, optional (default = None)\n", | |
" | additional keyword arguments for the metric function.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | >>> X = [[0], [1], [2], [3]]\n", | |
" | >>> y = [0, 0, 1, 1]\n", | |
" | >>> from sklearn.neighbors import RadiusNeighborsClassifier\n", | |
" | >>> neigh = RadiusNeighborsClassifier(radius=1.0)\n", | |
" | >>> neigh.fit(X, y) # doctest: +ELLIPSIS\n", | |
" | RadiusNeighborsClassifier(...)\n", | |
" | >>> print(neigh.predict([[1.5]]))\n", | |
" | [0]\n", | |
" | \n", | |
" | See also\n", | |
" | --------\n", | |
" | KNeighborsClassifier\n", | |
" | RadiusNeighborsRegressor\n", | |
" | KNeighborsRegressor\n", | |
" | NearestNeighbors\n", | |
" | \n", | |
" | Notes\n", | |
" | -----\n", | |
" | See :ref:`Nearest Neighbors <neighbors>` in the online documentation\n", | |
" | for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n", | |
" | \n", | |
" | http://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n", | |
" | \n", | |
" | Method resolution order:\n", | |
" | RadiusNeighborsClassifier\n", | |
" | sklearn.neighbors.base.NeighborsBase\n", | |
" | abc.NewBase\n", | |
" | sklearn.base.BaseEstimator\n", | |
" | sklearn.neighbors.base.RadiusNeighborsMixin\n", | |
" | sklearn.neighbors.base.SupervisedIntegerMixin\n", | |
" | sklearn.base.ClassifierMixin\n", | |
" | __builtin__.object\n", | |
" | \n", | |
" | Methods defined here:\n", | |
" | \n", | |
" | __init__(self, radius=1.0, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', outlier_label=None, metric_params=None, **kwargs)\n", | |
" | \n", | |
" | predict(self, X)\n", | |
" | Predict the class labels for the provided data\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array of shape [n_samples, n_features]\n", | |
" | A 2-D array representing the test points.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | y : array of shape [n_samples] or [n_samples, n_outputs]\n", | |
" | Class labels for each data sample.\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Data and other attributes defined here:\n", | |
" | \n", | |
" | __abstractmethods__ = frozenset([])\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.base.BaseEstimator:\n", | |
" | \n", | |
" | __repr__(self)\n", | |
" | \n", | |
" | get_params(self, deep=True)\n", | |
" | Get parameters for this estimator.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | deep: boolean, optional\n", | |
" | If True, will return the parameters for this estimator and\n", | |
" | contained subobjects that are estimators.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | params : mapping of string to any\n", | |
" | Parameter names mapped to their values.\n", | |
" | \n", | |
" | set_params(self, **params)\n", | |
" | Set the parameters of this estimator.\n", | |
" | \n", | |
" | The method works on simple estimators as well as on nested objects\n", | |
" | (such as pipelines). The former have parameters of the form\n", | |
" | ``<component>__<parameter>`` so that it's possible to update each\n", | |
" | component of a nested object.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | self\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Data descriptors inherited from sklearn.base.BaseEstimator:\n", | |
" | \n", | |
" | __dict__\n", | |
" | dictionary for instance variables (if defined)\n", | |
" | \n", | |
" | __weakref__\n", | |
" | list of weak references to the object (if defined)\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.neighbors.base.RadiusNeighborsMixin:\n", | |
" | \n", | |
" | radius_neighbors(self, X=None, radius=None, return_distance=True)\n", | |
" | Finds the neighbors within a given radius of a point or points.\n", | |
" | \n", | |
" | Return the indices and distances of each point from the dataset\n", | |
" | lying in a ball with size ``radius`` around the points of the query\n", | |
" | array. Points lying on the boundary are included in the results.\n", | |
" | \n", | |
" | The result points are *not* necessarily sorted by distance to their\n", | |
" | query point.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, (n_samples, n_features), optional\n", | |
" | The query point or points.\n", | |
" | If not provided, neighbors of each indexed point are returned.\n", | |
" | In this case, the query point is not considered its own neighbor.\n", | |
" | \n", | |
" | radius : float\n", | |
" | Limiting distance of neighbors to return.\n", | |
" | (default is the value passed to the constructor).\n", | |
" | \n", | |
" | return_distance : boolean, optional. Defaults to True.\n", | |
" | If False, distances will not be returned\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | dist : array, shape (n_samples,) of arrays\n", | |
" | Array representing the distances to each point, only present if\n", | |
" | return_distance=True. The distance values are computed according\n", | |
" | to the ``metric`` constructor parameter.\n", | |
" | \n", | |
" | ind : array, shape (n_samples,) of arrays\n", | |
" | An array of arrays of indices of the approximate nearest points\n", | |
" | from the population matrix that lie within a ball of size\n", | |
" | ``radius`` around the query points.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | In the following example, we construct a NeighborsClassifier\n", | |
" | class from an array representing our data set and ask who's\n", | |
" | the closest point to [1, 1, 1]:\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n", | |
" | >>> from sklearn.neighbors import NearestNeighbors\n", | |
" | >>> neigh = NearestNeighbors(radius=1.6)\n", | |
" | >>> neigh.fit(samples) # doctest: +ELLIPSIS\n", | |
" | NearestNeighbors(algorithm='auto', leaf_size=30, ...)\n", | |
" | >>> rng = neigh.radius_neighbors([1., 1., 1.])\n", | |
" | >>> print(np.asarray(rng[0][0])) # doctest: +ELLIPSIS\n", | |
" | [ 1.5 0.5]\n", | |
" | >>> print(np.asarray(rng[1][0])) # doctest: +ELLIPSIS\n", | |
" | [1 2]\n", | |
" | \n", | |
" | The first array returned contains the distances to all points which\n", | |
" | are closer than 1.6, while the second array returned contains their\n", | |
" | indices. In general, multiple points can be queried at the same time.\n", | |
" | \n", | |
" | Notes\n", | |
" | -----\n", | |
" | Because the number of neighbors of each point is not necessarily\n", | |
" | equal, the results for multiple query points cannot be fit in a\n", | |
" | standard data array.\n", | |
" | For efficiency, `radius_neighbors` returns arrays of objects, where\n", | |
" | each object is a 1D array of indices or distances.\n", | |
" | \n", | |
" | radius_neighbors_graph(self, X=None, radius=None, mode='connectivity')\n", | |
" | Computes the (weighted) graph of Neighbors for points in X\n", | |
" | \n", | |
" | Neighborhoods are restricted the points at a distance lower than\n", | |
" | radius.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, shape = [n_samples, n_features], optional\n", | |
" | The query point or points.\n", | |
" | If not provided, neighbors of each indexed point are returned.\n", | |
" | In this case, the query point is not considered its own neighbor.\n", | |
" | \n", | |
" | radius : float\n", | |
" | Radius of neighborhoods.\n", | |
" | (default is the value passed to the constructor).\n", | |
" | \n", | |
" | mode : {'connectivity', 'distance'}, optional\n", | |
" | Type of returned matrix: 'connectivity' will return the\n", | |
" | connectivity matrix with ones and zeros, in 'distance' the\n", | |
" | edges are Euclidean distance between points.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | A : sparse matrix in CSR format, shape = [n_samples, n_samples]\n", | |
" | A[i, j] is assigned the weight of edge that connects i to j.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | >>> X = [[0], [3], [1]]\n", | |
" | >>> from sklearn.neighbors import NearestNeighbors\n", | |
" | >>> neigh = NearestNeighbors(radius=1.5)\n", | |
" | >>> neigh.fit(X) # doctest: +ELLIPSIS\n", | |
" | NearestNeighbors(algorithm='auto', leaf_size=30, ...)\n", | |
" | >>> A = neigh.radius_neighbors_graph(X)\n", | |
" | >>> A.toarray()\n", | |
" | array([[ 1., 0., 1.],\n", | |
" | [ 0., 1., 0.],\n", | |
" | [ 1., 0., 1.]])\n", | |
" | \n", | |
" | See also\n", | |
" | --------\n", | |
" | kneighbors_graph\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.neighbors.base.SupervisedIntegerMixin:\n", | |
" | \n", | |
" | fit(self, X, y)\n", | |
" | Fit the model using X as training data and y as target values\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : {array-like, sparse matrix, BallTree, KDTree}\n", | |
" | Training data. If array or matrix, shape = [n_samples, n_features]\n", | |
" | \n", | |
" | y : {array-like, sparse matrix}\n", | |
" | Target values of shape = [n_samples] or [n_samples, n_outputs]\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.base.ClassifierMixin:\n", | |
" | \n", | |
" | score(self, X, y, sample_weight=None)\n", | |
" | Returns the mean accuracy on the given test data and labels.\n", | |
" | \n", | |
" | In multi-label classification, this is the subset accuracy\n", | |
" | which is a harsh metric since you require for each sample that\n", | |
" | each label set be correctly predicted.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, shape = (n_samples, n_features)\n", | |
" | Test samples.\n", | |
" | \n", | |
" | y : array-like, shape = (n_samples) or (n_samples, n_outputs)\n", | |
" | True labels for X.\n", | |
" | \n", | |
" | sample_weight : array-like, shape = [n_samples], optional\n", | |
" | Sample weights.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | score : float\n", | |
" | Mean accuracy of self.predict(X) wrt. y.\n", | |
" \n", | |
" class RadiusNeighborsRegressor(sklearn.neighbors.base.NeighborsBase, sklearn.neighbors.base.RadiusNeighborsMixin, sklearn.neighbors.base.SupervisedFloatMixin, sklearn.base.RegressorMixin)\n", | |
" | Regression based on neighbors within a fixed radius.\n", | |
" | \n", | |
" | The target is predicted by local interpolation of the targets\n", | |
" | associated of the nearest neighbors in the training set.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | radius : float, optional (default = 1.0)\n", | |
" | Range of parameter space to use by default for :meth`radius_neighbors`\n", | |
" | queries.\n", | |
" | \n", | |
" | weights : str or callable\n", | |
" | weight function used in prediction. Possible values:\n", | |
" | \n", | |
" | - 'uniform' : uniform weights. All points in each neighborhood\n", | |
" | are weighted equally.\n", | |
" | - 'distance' : weight points by the inverse of their distance.\n", | |
" | in this case, closer neighbors of a query point will have a\n", | |
" | greater influence than neighbors which are further away.\n", | |
" | - [callable] : a user-defined function which accepts an\n", | |
" | array of distances, and returns an array of the same shape\n", | |
" | containing the weights.\n", | |
" | \n", | |
" | Uniform weights are used by default.\n", | |
" | \n", | |
" | algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional\n", | |
" | Algorithm used to compute the nearest neighbors:\n", | |
" | \n", | |
" | - 'ball_tree' will use :class:`BallTree`\n", | |
" | - 'kd_tree' will use :class:`KDtree`\n", | |
" | - 'brute' will use a brute-force search.\n", | |
" | - 'auto' will attempt to decide the most appropriate algorithm\n", | |
" | based on the values passed to :meth:`fit` method.\n", | |
" | \n", | |
" | Note: fitting on sparse input will override the setting of\n", | |
" | this parameter, using brute force.\n", | |
" | \n", | |
" | leaf_size : int, optional (default = 30)\n", | |
" | Leaf size passed to BallTree or KDTree. This can affect the\n", | |
" | speed of the construction and query, as well as the memory\n", | |
" | required to store the tree. The optimal value depends on the\n", | |
" | nature of the problem.\n", | |
" | \n", | |
" | metric : string or DistanceMetric object (default='minkowski')\n", | |
" | the distance metric to use for the tree. The default metric is\n", | |
" | minkowski, and with p=2 is equivalent to the standard Euclidean\n", | |
" | metric. See the documentation of the DistanceMetric class for a\n", | |
" | list of available metrics.\n", | |
" | \n", | |
" | p : integer, optional (default = 2)\n", | |
" | Power parameter for the Minkowski metric. When p = 1, this is\n", | |
" | equivalent to using manhattan_distance (l1), and euclidean_distance\n", | |
" | (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n", | |
" | \n", | |
" | metric_params: dict, optional (default = None)\n", | |
" | additional keyword arguments for the metric function.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | >>> X = [[0], [1], [2], [3]]\n", | |
" | >>> y = [0, 0, 1, 1]\n", | |
" | >>> from sklearn.neighbors import RadiusNeighborsRegressor\n", | |
" | >>> neigh = RadiusNeighborsRegressor(radius=1.0)\n", | |
" | >>> neigh.fit(X, y) # doctest: +ELLIPSIS\n", | |
" | RadiusNeighborsRegressor(...)\n", | |
" | >>> print(neigh.predict([[1.5]]))\n", | |
" | [ 0.5]\n", | |
" | \n", | |
" | See also\n", | |
" | --------\n", | |
" | NearestNeighbors\n", | |
" | KNeighborsRegressor\n", | |
" | KNeighborsClassifier\n", | |
" | RadiusNeighborsClassifier\n", | |
" | \n", | |
" | Notes\n", | |
" | -----\n", | |
" | See :ref:`Nearest Neighbors <neighbors>` in the online documentation\n", | |
" | for a discussion of the choice of ``algorithm`` and ``leaf_size``.\n", | |
" | \n", | |
" | http://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm\n", | |
" | \n", | |
" | Method resolution order:\n", | |
" | RadiusNeighborsRegressor\n", | |
" | sklearn.neighbors.base.NeighborsBase\n", | |
" | abc.NewBase\n", | |
" | sklearn.base.BaseEstimator\n", | |
" | sklearn.neighbors.base.RadiusNeighborsMixin\n", | |
" | sklearn.neighbors.base.SupervisedFloatMixin\n", | |
" | sklearn.base.RegressorMixin\n", | |
" | __builtin__.object\n", | |
" | \n", | |
" | Methods defined here:\n", | |
" | \n", | |
" | __init__(self, radius=1.0, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None, **kwargs)\n", | |
" | \n", | |
" | predict(self, X)\n", | |
" | Predict the target for the provided data\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array or matrix, shape = [n_samples, n_features]\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | y : array of int, shape = [n_samples] or [n_samples, n_outputs]\n", | |
" | Target values\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Data and other attributes defined here:\n", | |
" | \n", | |
" | __abstractmethods__ = frozenset([])\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.base.BaseEstimator:\n", | |
" | \n", | |
" | __repr__(self)\n", | |
" | \n", | |
" | get_params(self, deep=True)\n", | |
" | Get parameters for this estimator.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | deep: boolean, optional\n", | |
" | If True, will return the parameters for this estimator and\n", | |
" | contained subobjects that are estimators.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | params : mapping of string to any\n", | |
" | Parameter names mapped to their values.\n", | |
" | \n", | |
" | set_params(self, **params)\n", | |
" | Set the parameters of this estimator.\n", | |
" | \n", | |
" | The method works on simple estimators as well as on nested objects\n", | |
" | (such as pipelines). The former have parameters of the form\n", | |
" | ``<component>__<parameter>`` so that it's possible to update each\n", | |
" | component of a nested object.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | self\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Data descriptors inherited from sklearn.base.BaseEstimator:\n", | |
" | \n", | |
" | __dict__\n", | |
" | dictionary for instance variables (if defined)\n", | |
" | \n", | |
" | __weakref__\n", | |
" | list of weak references to the object (if defined)\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.neighbors.base.RadiusNeighborsMixin:\n", | |
" | \n", | |
" | radius_neighbors(self, X=None, radius=None, return_distance=True)\n", | |
" | Finds the neighbors within a given radius of a point or points.\n", | |
" | \n", | |
" | Return the indices and distances of each point from the dataset\n", | |
" | lying in a ball with size ``radius`` around the points of the query\n", | |
" | array. Points lying on the boundary are included in the results.\n", | |
" | \n", | |
" | The result points are *not* necessarily sorted by distance to their\n", | |
" | query point.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, (n_samples, n_features), optional\n", | |
" | The query point or points.\n", | |
" | If not provided, neighbors of each indexed point are returned.\n", | |
" | In this case, the query point is not considered its own neighbor.\n", | |
" | \n", | |
" | radius : float\n", | |
" | Limiting distance of neighbors to return.\n", | |
" | (default is the value passed to the constructor).\n", | |
" | \n", | |
" | return_distance : boolean, optional. Defaults to True.\n", | |
" | If False, distances will not be returned\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | dist : array, shape (n_samples,) of arrays\n", | |
" | Array representing the distances to each point, only present if\n", | |
" | return_distance=True. The distance values are computed according\n", | |
" | to the ``metric`` constructor parameter.\n", | |
" | \n", | |
" | ind : array, shape (n_samples,) of arrays\n", | |
" | An array of arrays of indices of the approximate nearest points\n", | |
" | from the population matrix that lie within a ball of size\n", | |
" | ``radius`` around the query points.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | In the following example, we construct a NeighborsClassifier\n", | |
" | class from an array representing our data set and ask who's\n", | |
" | the closest point to [1, 1, 1]:\n", | |
" | \n", | |
" | >>> import numpy as np\n", | |
" | >>> samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]\n", | |
" | >>> from sklearn.neighbors import NearestNeighbors\n", | |
" | >>> neigh = NearestNeighbors(radius=1.6)\n", | |
" | >>> neigh.fit(samples) # doctest: +ELLIPSIS\n", | |
" | NearestNeighbors(algorithm='auto', leaf_size=30, ...)\n", | |
" | >>> rng = neigh.radius_neighbors([1., 1., 1.])\n", | |
" | >>> print(np.asarray(rng[0][0])) # doctest: +ELLIPSIS\n", | |
" | [ 1.5 0.5]\n", | |
" | >>> print(np.asarray(rng[1][0])) # doctest: +ELLIPSIS\n", | |
" | [1 2]\n", | |
" | \n", | |
" | The first array returned contains the distances to all points which\n", | |
" | are closer than 1.6, while the second array returned contains their\n", | |
" | indices. In general, multiple points can be queried at the same time.\n", | |
" | \n", | |
" | Notes\n", | |
" | -----\n", | |
" | Because the number of neighbors of each point is not necessarily\n", | |
" | equal, the results for multiple query points cannot be fit in a\n", | |
" | standard data array.\n", | |
" | For efficiency, `radius_neighbors` returns arrays of objects, where\n", | |
" | each object is a 1D array of indices or distances.\n", | |
" | \n", | |
" | radius_neighbors_graph(self, X=None, radius=None, mode='connectivity')\n", | |
" | Computes the (weighted) graph of Neighbors for points in X\n", | |
" | \n", | |
" | Neighborhoods are restricted the points at a distance lower than\n", | |
" | radius.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, shape = [n_samples, n_features], optional\n", | |
" | The query point or points.\n", | |
" | If not provided, neighbors of each indexed point are returned.\n", | |
" | In this case, the query point is not considered its own neighbor.\n", | |
" | \n", | |
" | radius : float\n", | |
" | Radius of neighborhoods.\n", | |
" | (default is the value passed to the constructor).\n", | |
" | \n", | |
" | mode : {'connectivity', 'distance'}, optional\n", | |
" | Type of returned matrix: 'connectivity' will return the\n", | |
" | connectivity matrix with ones and zeros, in 'distance' the\n", | |
" | edges are Euclidean distance between points.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | A : sparse matrix in CSR format, shape = [n_samples, n_samples]\n", | |
" | A[i, j] is assigned the weight of edge that connects i to j.\n", | |
" | \n", | |
" | Examples\n", | |
" | --------\n", | |
" | >>> X = [[0], [3], [1]]\n", | |
" | >>> from sklearn.neighbors import NearestNeighbors\n", | |
" | >>> neigh = NearestNeighbors(radius=1.5)\n", | |
" | >>> neigh.fit(X) # doctest: +ELLIPSIS\n", | |
" | NearestNeighbors(algorithm='auto', leaf_size=30, ...)\n", | |
" | >>> A = neigh.radius_neighbors_graph(X)\n", | |
" | >>> A.toarray()\n", | |
" | array([[ 1., 0., 1.],\n", | |
" | [ 0., 1., 0.],\n", | |
" | [ 1., 0., 1.]])\n", | |
" | \n", | |
" | See also\n", | |
" | --------\n", | |
" | kneighbors_graph\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.neighbors.base.SupervisedFloatMixin:\n", | |
" | \n", | |
" | fit(self, X, y)\n", | |
" | Fit the model using X as training data and y as target values\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : {array-like, sparse matrix, BallTree, KDTree}\n", | |
" | Training data. If array or matrix, shape = [n_samples, n_features]\n", | |
" | \n", | |
" | y : {array-like, sparse matrix}\n", | |
" | Target values, array of float values, shape = [n_samples]\n", | |
" | or [n_samples, n_outputs]\n", | |
" | \n", | |
" | ----------------------------------------------------------------------\n", | |
" | Methods inherited from sklearn.base.RegressorMixin:\n", | |
" | \n", | |
" | score(self, X, y, sample_weight=None)\n", | |
" | Returns the coefficient of determination R^2 of the prediction.\n", | |
" | \n", | |
" | The coefficient R^2 is defined as (1 - u/v), where u is the regression\n", | |
" | sum of squares ((y_true - y_pred) ** 2).sum() and v is the residual\n", | |
" | sum of squares ((y_true - y_true.mean()) ** 2).sum().\n", | |
" | Best possible score is 1.0, lower values are worse.\n", | |
" | \n", | |
" | Parameters\n", | |
" | ----------\n", | |
" | X : array-like, shape = (n_samples, n_features)\n", | |
" | Test samples.\n", | |
" | \n", | |
" | y : array-like, shape = (n_samples) or (n_samples, n_outputs)\n", | |
" | True values for X.\n", | |
" | \n", | |
" | sample_weight : array-like, shape = [n_samples], optional\n", | |
" | Sample weights.\n", | |
" | \n", | |
" | Returns\n", | |
" | -------\n", | |
" | score : float\n", | |
" | R^2 of self.predict(X) wrt. y.\n", | |
"\n", | |
"FUNCTIONS\n", | |
" kneighbors_graph(X, n_neighbors, mode='connectivity', metric='minkowski', p=2, metric_params=None, include_self=None)\n", | |
" Computes the (weighted) graph of k-Neighbors for points in X\n", | |
" \n", | |
" Parameters\n", | |
" ----------\n", | |
" X : array-like or BallTree, shape = [n_samples, n_features]\n", | |
" Sample data, in the form of a numpy array or a precomputed\n", | |
" :class:`BallTree`.\n", | |
" \n", | |
" n_neighbors : int\n", | |
" Number of neighbors for each sample.\n", | |
" \n", | |
" mode : {'connectivity', 'distance'}, optional\n", | |
" Type of returned matrix: 'connectivity' will return the\n", | |
" connectivity matrix with ones and zeros, in 'distance' the\n", | |
" edges are Euclidean distance between points.\n", | |
" \n", | |
" metric : string, default 'minkowski'\n", | |
" The distance metric used to calculate the k-Neighbors for each sample\n", | |
" point. The DistanceMetric class gives a list of available metrics.\n", | |
" The default distance is 'euclidean' ('minkowski' metric with the p\n", | |
" param equal to 2.)\n", | |
" \n", | |
" include_self: bool, default backward-compatible.\n", | |
" Whether or not to mark each sample as the first nearest neighbor to\n", | |
" itself. If `None`, then True is used for mode='connectivity' and False\n", | |
" for mode='distance' as this will preserve backwards compatibilty. From\n", | |
" version 0.18, the default value will be False, irrespective of the\n", | |
" value of `mode`.\n", | |
" \n", | |
" p : int, default 2\n", | |
" Power parameter for the Minkowski metric. When p = 1, this is\n", | |
" equivalent to using manhattan_distance (l1), and euclidean_distance\n", | |
" (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n", | |
" \n", | |
" metric_params: dict, optional\n", | |
" additional keyword arguments for the metric function.\n", | |
" \n", | |
" Returns\n", | |
" -------\n", | |
" A : sparse matrix in CSR format, shape = [n_samples, n_samples]\n", | |
" A[i, j] is assigned the weight of edge that connects i to j.\n", | |
" \n", | |
" Examples\n", | |
" --------\n", | |
" >>> X = [[0], [3], [1]]\n", | |
" >>> from sklearn.neighbors import kneighbors_graph\n", | |
" >>> A = kneighbors_graph(X, 2)\n", | |
" >>> A.toarray()\n", | |
" array([[ 1., 0., 1.],\n", | |
" [ 0., 1., 1.],\n", | |
" [ 1., 0., 1.]])\n", | |
" \n", | |
" See also\n", | |
" --------\n", | |
" radius_neighbors_graph\n", | |
" \n", | |
" radius_neighbors_graph(X, radius, mode='connectivity', metric='minkowski', p=2, metric_params=None, include_self=None)\n", | |
" Computes the (weighted) graph of Neighbors for points in X\n", | |
" \n", | |
" Neighborhoods are restricted the points at a distance lower than\n", | |
" radius.\n", | |
" \n", | |
" Parameters\n", | |
" ----------\n", | |
" X : array-like or BallTree, shape = [n_samples, n_features]\n", | |
" Sample data, in the form of a numpy array or a precomputed\n", | |
" :class:`BallTree`.\n", | |
" \n", | |
" radius : float\n", | |
" Radius of neighborhoods.\n", | |
" \n", | |
" mode : {'connectivity', 'distance'}, optional\n", | |
" Type of returned matrix: 'connectivity' will return the\n", | |
" connectivity matrix with ones and zeros, in 'distance' the\n", | |
" edges are Euclidean distance between points.\n", | |
" \n", | |
" metric : string, default 'minkowski'\n", | |
" The distance metric used to calculate the neighbors within a\n", | |
" given radius for each sample point. The DistanceMetric class\n", | |
" gives a list of available metrics. The default distance is\n", | |
" 'euclidean' ('minkowski' metric with the param equal to 2.)\n", | |
" \n", | |
" include_self: bool, default None\n", | |
" Whether or not to mark each sample as the first nearest neighbor to\n", | |
" itself. If `None`, then True is used for mode='connectivity' and False\n", | |
" for mode='distance' as this will preserve backwards compatibilty. From\n", | |
" version 0.18, the default value will be False, irrespective of the\n", | |
" value of `mode`.\n", | |
" \n", | |
" p : int, default 2\n", | |
" Power parameter for the Minkowski metric. When p = 1, this is\n", | |
" equivalent to using manhattan_distance (l1), and euclidean_distance\n", | |
" (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n", | |
" \n", | |
" metric_params: dict, optional\n", | |
" additional keyword arguments for the metric function.\n", | |
" \n", | |
" Returns\n", | |
" -------\n", | |
" A : sparse matrix in CSR format, shape = [n_samples, n_samples]\n", | |
" A[i, j] is assigned the weight of edge that connects i to j.\n", | |
" \n", | |
" Examples\n", | |
" --------\n", | |
" >>> X = [[0], [3], [1]]\n", | |
" >>> from sklearn.neighbors import radius_neighbors_graph\n", | |
" >>> A = radius_neighbors_graph(X, 1.5)\n", | |
" >>> A.toarray()\n", | |
" array([[ 1., 0., 1.],\n", | |
" [ 0., 1., 0.],\n", | |
" [ 1., 0., 1.]])\n", | |
" \n", | |
" See also\n", | |
" --------\n", | |
" kneighbors_graph\n", | |
"\n", | |
"DATA\n", | |
" __all__ = ['BallTree', 'DistanceMetric', 'KDTree', 'KNeighborsClassifi...\n", | |
" __warningregistry__ = {('numpy.dtype size changed, may indicate binary...\n", | |
"\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"# About KNN model in Scikit Learn\n", | |
"help(neighbors)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n", | |
" metric_params=None, n_neighbors=5, p=2, weights='uniform')" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# create the model\n", | |
"knn_model = neighbors.KNeighborsClassifier(n_neighbors=5)\n", | |
"\n", | |
"# fit the model\n", | |
"knn_model.fit(X_train,y_train)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Predicted Probabilities \n", | |
"------------------------\n", | |
"['setosa' 'versicolor' 'virginica']\n", | |
"[[ 0. 1. 0. ]\n", | |
" [ 1. 0. 0. ]\n", | |
" [ 0. 0. 1. ]\n", | |
" [ 0. 1. 0. ]\n", | |
" [ 0. 1. 0. ]\n", | |
" [ 1. 0. 0. ]\n", | |
" [ 0. 1. 0. ]\n", | |
" [ 0. 0. 1. ]\n", | |
" [ 0. 0.6 0.4]\n", | |
" [ 0. 1. 0. ]\n", | |
" [ 0. 0.2 0.8]\n", | |
" [ 1. 0. 0. ]\n", | |
" [ 1. 0. 0. ]\n", | |
" [ 1. 0. 0. ]\n", | |
" [ 1. 0. 0. ]\n", | |
" [ 0. 0.8 0.2]\n", | |
" [ 0. 0. 1. ]\n", | |
" [ 0. 1. 0. ]\n", | |
" [ 0. 1. 0. ]\n", | |
" [ 0. 0. 1. ]\n", | |
" [ 1. 0. 0. ]\n", | |
" [ 0. 0.2 0.8]\n", | |
" [ 1. 0. 0. ]\n", | |
" [ 0. 0. 1. ]\n", | |
" [ 0. 0. 1. ]\n", | |
" [ 0. 0. 1. ]\n", | |
" [ 0. 0. 1. ]\n", | |
" [ 0. 0. 1. ]\n", | |
" [ 1. 0. 0. ]\n", | |
" [ 1. 0. 0. ]\n", | |
" [ 1. 0. 0. ]\n", | |
" [ 1. 0. 0. ]\n", | |
" [ 0. 1. 0. ]\n", | |
" [ 1. 0. 0. ]\n", | |
" [ 1. 0. 0. ]\n", | |
" [ 0. 0.4 0.6]\n", | |
" [ 0. 1. 0. ]\n", | |
" [ 1. 0. 0. ]\n", | |
" [ 1. 0. 0. ]\n", | |
" [ 1. 0. 0. ]\n", | |
" [ 0. 0.2 0.8]\n", | |
" [ 0. 0.8 0.2]\n", | |
" [ 0. 1. 0. ]\n", | |
" [ 1. 0. 0. ]\n", | |
" [ 1. 0. 0. ]]\n", | |
"\n", | |
"Prediction / Actual\n", | |
"-------------------\n", | |
"versicolor / versicolor\n", | |
"setosa / setosa\n", | |
"virginica / virginica\n", | |
"versicolor / versicolor\n", | |
"versicolor / versicolor\n", | |
"setosa / setosa\n", | |
"versicolor / versicolor\n", | |
"virginica / virginica\n", | |
"versicolor / versicolor\n", | |
"versicolor / versicolor\n", | |
"virginica / virginica\n", | |
"setosa / setosa\n", | |
"setosa / setosa\n", | |
"setosa / setosa\n", | |
"setosa / setosa\n", | |
"versicolor / versicolor\n", | |
"virginica / virginica\n", | |
"versicolor / versicolor\n", | |
"versicolor / versicolor\n", | |
"virginica / virginica\n", | |
"setosa / setosa\n", | |
"virginica / virginica\n", | |
"setosa / setosa\n", | |
"virginica / virginica\n", | |
"virginica / virginica\n", | |
"virginica / virginica\n", | |
"virginica / virginica\n", | |
"virginica / virginica\n", | |
"setosa / setosa\n", | |
"setosa / setosa\n", | |
"setosa / setosa\n", | |
"setosa / setosa\n", | |
"versicolor / versicolor\n", | |
"setosa / setosa\n", | |
"setosa / setosa\n", | |
"virginica / virginica\n", | |
"versicolor / versicolor\n", | |
"setosa / setosa\n", | |
"setosa / setosa\n", | |
"setosa / setosa\n", | |
"virginica / virginica\n", | |
"versicolor / versicolor\n", | |
"versicolor / versicolor\n", | |
"setosa / setosa\n", | |
"setosa / setosa\n" | |
] | |
} | |
], | |
"source": [ | |
"# model predictions\n", | |
"predictions = knn_model.predict(X_test)\n", | |
"prob_predictions = knn_model.predict_proba(X_test)\n", | |
"predictions = [iris.target_names[i] for i in predictions]\n", | |
"print \"Predicted Probabilities \"\n", | |
"print \"------------------------\"\n", | |
"print iris.target_names\n", | |
"print prob_predictions\n", | |
"print \n", | |
"# as opposed to actuals\n", | |
"y_test\n", | |
"actuals = [iris.target_names[i] for i in y_test]\n", | |
"print \"Prediction / Actual\"\n", | |
"print \"-------------------\"\n", | |
"for i in range(len(y_test)):\n", | |
" print predictions[i] + \" / \" + actuals[i]" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.11" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment