decisionstats/numpy and pandas 1

## numpy and pandas 1
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "https://s3.amazonaws.com/quandl-static-content/Documents/Quandl+-+Pandas,+SciPy,+NumPy+Cheat+Sheet.pdf\n",
    "Quandl Cheat Sheet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import scipy as sp\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,\n",
       "       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])"
      ]
     },
     "execution_count": 143,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.arange(30) #arange prints numbers from 0 to number specified"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "a1=np.arange(30)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "435"
      ]
     },
     "execution_count": 145,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.sum(a1) #sum of array"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0,  1,  2,  3,  4],\n",
       "       [ 5,  6,  7,  8,  9],\n",
       "       [10, 11, 12, 13, 14],\n",
       "       [15, 16, 17, 18, 19],\n",
       "       [20, 21, 22, 23, 24],\n",
       "       [25, 26, 27, 28, 29]])"
      ]
     },
     "execution_count": 147,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = a1.reshape(6,5) #reshapes array in rows,columns format\n",
    "a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[[ 0,  1,  2,  3,  4],\n",
       "        [ 5,  6,  7,  8,  9]],\n",
       "\n",
       "       [[10, 11, 12, 13, 14],\n",
       "        [15, 16, 17, 18, 19]],\n",
       "\n",
       "       [[20, 21, 22, 23, 24],\n",
       "        [25, 26, 27, 28, 29]]])"
      ]
     },
     "execution_count": 148,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a1.reshape(3,2,5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[[ 0, 10, 20],\n",
       "        [ 5, 15, 25]],\n",
       "\n",
       "       [[ 1, 11, 21],\n",
       "        [ 6, 16, 26]],\n",
       "\n",
       "       [[ 2, 12, 22],\n",
       "        [ 7, 17, 27]],\n",
       "\n",
       "       [[ 3, 13, 23],\n",
       "        [ 8, 18, 28]],\n",
       "\n",
       "       [[ 4, 14, 24],\n",
       "        [ 9, 19, 29]]])"
      ]
     },
     "execution_count": 150,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b = a1.reshape(3,2,5).swapaxes(0,2) #swaping axes\n",
    "b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[[ 0, 10, 20],\n",
       "        [ 1, 11, 21],\n",
       "        [ 2, 12, 22],\n",
       "        [ 3, 13, 23],\n",
       "        [ 4, 14, 24]],\n",
       "\n",
       "       [[ 5, 15, 25],\n",
       "        [ 6, 16, 26],\n",
       "        [ 7, 17, 27],\n",
       "        [ 8, 18, 28],\n",
       "        [ 9, 19, 29]]])"
      ]
     },
     "execution_count": 151,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b.swapaxes(0,1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "14.5"
      ]
     },
     "execution_count": 153,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.mean(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8.6554414483991895"
      ]
     },
     "execution_count": 154,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.std(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24\n",
      " 25 26 27 28 29]\n",
      "[0 1 2 3 4 5 6 7 8 9]\n"
     ]
    }
   ],
   "source": [
    "f=np.arange(30)\n",
    "g=np.arange(10)\n",
    "print(f)\n",
    "print(g)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "https://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html \n",
    "convolution is a mathematical operation on two functions (f and g); it produces a third function, that is typically viewed as a modified version of one of the original functions, giving the integral of the pointwise multiplication of the two functions as a function of the amount that one of the original functions is translated. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[   0    0    1    4   10   20   35   56   84  120  165  210  255  300  345\n",
      "  390  435  480  525  570  615  660  705  750  795  840  885  930  975 1020\n",
      " 1065 1080 1064 1016  935  820  670  484  261]\n"
     ]
    }
   ],
   "source": [
    "c=np.convolve(f,g)\n",
    "print(c)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(39,)"
      ]
     },
     "execution_count": 160,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "https://docs.scipy.org/doc/numpy/reference/generated/numpy.dot.html For 2-D arrays it is equivalent to matrix multiplication, and for 1-D arrays to inner product of vectors (without complex conjugation). For N dimensions it is a sum product over the last axis of a and the second-to-last of b:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 179,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,\n",
       "        17, 18, 19],\n",
       "       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,\n",
       "        37, 38, 39],\n",
       "       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,\n",
       "        57, 58, 59],\n",
       "       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,\n",
       "        77, 78, 79],\n",
       "       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96,\n",
       "        97, 98, 99]])"
      ]
     },
     "execution_count": 179,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = np.arange(100).reshape((5,20))\n",
    "a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 182,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[  0   1   2   3   4   5   6   7   8   9]\n",
      " [ 10  11  12  13  14  15  16  17  18  19]\n",
      " [ 20  21  22  23  24  25  26  27  28  29]\n",
      " [ 30  31  32  33  34  35  36  37  38  39]\n",
      " [ 40  41  42  43  44  45  46  47  48  49]\n",
      " [ 50  51  52  53  54  55  56  57  58  59]\n",
      " [ 60  61  62  63  64  65  66  67  68  69]\n",
      " [ 70  71  72  73  74  75  76  77  78  79]\n",
      " [ 80  81  82  83  84  85  86  87  88  89]\n",
      " [ 90  91  92  93  94  95  96  97  98  99]\n",
      " [100 101 102 103 104 105 106 107 108 109]\n",
      " [110 111 112 113 114 115 116 117 118 119]\n",
      " [120 121 122 123 124 125 126 127 128 129]\n",
      " [130 131 132 133 134 135 136 137 138 139]\n",
      " [140 141 142 143 144 145 146 147 148 149]\n",
      " [150 151 152 153 154 155 156 157 158 159]\n",
      " [160 161 162 163 164 165 166 167 168 169]\n",
      " [170 171 172 173 174 175 176 177 178 179]\n",
      " [180 181 182 183 184 185 186 187 188 189]\n",
      " [190 191 192 193 194 195 196 197 198 199]]\n"
     ]
    }
   ],
   "source": [
    "b = np.arange(200).reshape(20,10)\n",
    "print(b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 183,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 24700,  24890,  25080,  25270,  25460,  25650,  25840,  26030,\n",
       "         26220,  26410],\n",
       "       [ 62700,  63290,  63880,  64470,  65060,  65650,  66240,  66830,\n",
       "         67420,  68010],\n",
       "       [100700, 101690, 102680, 103670, 104660, 105650, 106640, 107630,\n",
       "        108620, 109610],\n",
       "       [138700, 140090, 141480, 142870, 144260, 145650, 147040, 148430,\n",
       "        149820, 151210],\n",
       "       [176700, 178490, 180280, 182070, 183860, 185650, 187440, 189230,\n",
       "        191020, 192810]])"
      ]
     },
     "execution_count": 183,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.dot(a, b)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# PANDAS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "diamonds =pd.read_csv(\"https://vincentarelbundock.github.io/Rdatasets/csv/ggplot2/diamonds.csv \")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.frame.DataFrame"
      ]
     },
     "execution_count": 112,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(diamonds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 184,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1, 0.23, 'Ideal', ..., 3.95, 3.98, 2.43],\n",
       "       [2, 0.21, 'Premium', ..., 3.89, 3.84, 2.31],\n",
       "       [3, 0.23, 'Good', ..., 4.05, 4.07, 2.31],\n",
       "       ..., \n",
       "       [53938, 0.7, 'Very Good', ..., 5.66, 5.68, 3.56],\n",
       "       [53939, 0.86, 'Premium', ..., 6.15, 6.12, 3.74],\n",
       "       [53940, 0.75, 'Ideal', ..., 5.83, 5.87, 3.64]], dtype=object)"
      ]
     },
     "execution_count": 184,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "diamonds.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RangeIndex(start=0, stop=53940, step=1)"
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "diamonds.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Unnamed: 0', 'carat', 'cut', 'color', 'clarity', 'depth', 'table',\n",
       "       'price', 'x', 'y', 'z'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 114,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "diamonds.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y',\n",
       "       'z'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 115,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "diamonds.columns[1:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 187,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "caratseries=pd.Series(diamonds[\"carat\"],index=diamonds.index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 188,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.series.Series"
      ]
     },
     "execution_count": 188,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(caratseries)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 190,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        0.23\n",
       "1        0.21\n",
       "2        0.23\n",
       "3        0.29\n",
       "4        0.31\n",
       "5        0.24\n",
       "6        0.24\n",
       "7        0.26\n",
       "8        0.22\n",
       "9        0.23\n",
       "10       0.30\n",
       "11       0.23\n",
       "12       0.22\n",
       "13       0.31\n",
       "14       0.20\n",
       "15       0.32\n",
       "16       0.30\n",
       "17       0.30\n",
       "18       0.30\n",
       "19       0.30\n",
       "20       0.30\n",
       "21       0.23\n",
       "22       0.23\n",
       "23       0.31\n",
       "24       0.31\n",
       "25       0.23\n",
       "26       0.24\n",
       "27       0.30\n",
       "28       0.23\n",
       "29       0.23\n",
       "         ... \n",
       "53910    0.70\n",
       "53911    0.57\n",
       "53912    0.61\n",
       "53913    0.80\n",
       "53914    0.84\n",
       "53915    0.77\n",
       "53916    0.74\n",
       "53917    0.90\n",
       "53918    0.76\n",
       "53919    0.76\n",
       "53920    0.70\n",
       "53921    0.70\n",
       "53922    0.70\n",
       "53923    0.73\n",
       "53924    0.73\n",
       "53925    0.79\n",
       "53926    0.71\n",
       "53927    0.79\n",
       "53928    0.79\n",
       "53929    0.71\n",
       "53930    0.71\n",
       "53931    0.71\n",
       "53932    0.70\n",
       "53933    0.70\n",
       "53934    0.72\n",
       "53935    0.72\n",
       "53936    0.72\n",
       "53937    0.70\n",
       "53938    0.86\n",
       "53939    0.75\n",
       "Name: carat, dtype: float64"
      ]
     },
     "execution_count": 190,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "caratseries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 197,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#p=pd.Panel(matrix, items=0, major_axis=1, minor_axis=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 199,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        0.23\n",
       "1        0.21\n",
       "2        0.23\n",
       "3        0.29\n",
       "4        0.31\n",
       "5        0.24\n",
       "6        0.24\n",
       "7        0.26\n",
       "8        0.22\n",
       "9        0.23\n",
       "10       0.30\n",
       "11       0.23\n",
       "12       0.22\n",
       "13       0.31\n",
       "14       0.20\n",
       "15       0.32\n",
       "16       0.30\n",
       "17       0.30\n",
       "18       0.30\n",
       "19       0.30\n",
       "20       0.30\n",
       "21       0.23\n",
       "22       0.23\n",
       "23       0.31\n",
       "24       0.31\n",
       "25       0.23\n",
       "26       0.24\n",
       "27       0.30\n",
       "28       0.23\n",
       "29       0.23\n",
       "         ... \n",
       "53910    0.70\n",
       "53911    0.57\n",
       "53912    0.61\n",
       "53913    0.80\n",
       "53914    0.84\n",
       "53915    0.77\n",
       "53916    0.74\n",
       "53917    0.90\n",
       "53918    0.76\n",
       "53919    0.76\n",
       "53920    0.70\n",
       "53921    0.70\n",
       "53922    0.70\n",
       "53923    0.73\n",
       "53924    0.73\n",
       "53925    0.79\n",
       "53926    0.71\n",
       "53927    0.79\n",
       "53928    0.79\n",
       "53929    0.71\n",
       "53930    0.71\n",
       "53931    0.71\n",
       "53932    0.70\n",
       "53933    0.70\n",
       "53934    0.72\n",
       "53935    0.72\n",
       "53936    0.72\n",
       "53937    0.70\n",
       "53938    0.86\n",
       "53939    0.75\n",
       "Name: carat, dtype: float64"
      ]
     },
     "execution_count": 199,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "diamonds['carat']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "diamonds2=diamonds.as_matrix(columns=diamonds.columns[1:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "numpy.ndarray"
      ]
     },
     "execution_count": 118,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(diamonds2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.23, 'Ideal', 'E', ..., 3.95, 3.98, 2.43],\n",
       "       [0.21, 'Premium', 'E', ..., 3.89, 3.84, 2.31],\n",
       "       [0.23, 'Good', 'E', ..., 4.05, 4.07, 2.31],\n",
       "       ..., \n",
       "       [0.7, 'Very Good', 'D', ..., 5.66, 5.68, 3.56],\n",
       "       [0.86, 'Premium', 'H', ..., 6.15, 6.12, 3.74],\n",
       "       [0.75, 'Ideal', 'D', ..., 5.83, 5.87, 3.64]], dtype=object)"
      ]
     },
     "execution_count": 119,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "diamonds2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(53940, 10)"
      ]
     },
     "execution_count": 120,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "diamonds2.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "53940"
      ]
     },
     "execution_count": 121,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(diamonds2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "diamonds3=pd.DataFrame(data=diamonds2,    # values\n",
    "             index=diamonds.index,    # 1st column as index\n",
    "              columns=diamonds.columns[1:])  # 1st row as the column names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 53940 entries, 0 to 53939\n",
      "Data columns (total 10 columns):\n",
      "carat      53940 non-null object\n",
      "cut        53940 non-null object\n",
      "color      53940 non-null object\n",
      "clarity    53940 non-null object\n",
      "depth      53940 non-null object\n",
      "table      53940 non-null object\n",
      "price      53940 non-null object\n",
      "x          53940 non-null object\n",
      "y          53940 non-null object\n",
      "z          53940 non-null object\n",
      "dtypes: object(10)\n",
      "memory usage: 4.1+ MB\n"
     ]
    }
   ],
   "source": [
    "diamonds3.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "<class 'numpy.ndarray'>\n"
     ]
    }
   ],
   "source": [
    "a=diamonds.iloc[:,1:]\n",
    "b=diamonds.iloc[:,1:].values\n",
    "\n",
    "print(type(diamonds))\n",
    "print(type(a))\n",
    "print(type(b))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>carat</th>\n",
       "      <th>cut</th>\n",
       "      <th>color</th>\n",
       "      <th>clarity</th>\n",
       "      <th>depth</th>\n",
       "      <th>table</th>\n",
       "      <th>price</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>z</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.23</td>\n",
       "      <td>Ideal</td>\n",
       "      <td>E</td>\n",
       "      <td>SI2</td>\n",
       "      <td>61.5</td>\n",
       "      <td>55.0</td>\n",
       "      <td>326</td>\n",
       "      <td>3.95</td>\n",
       "      <td>3.98</td>\n",
       "      <td>2.43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.21</td>\n",
       "      <td>Premium</td>\n",
       "      <td>E</td>\n",
       "      <td>SI1</td>\n",
       "      <td>59.8</td>\n",
       "      <td>61.0</td>\n",
       "      <td>326</td>\n",
       "      <td>3.89</td>\n",
       "      <td>3.84</td>\n",
       "      <td>2.31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.23</td>\n",
       "      <td>Good</td>\n",
       "      <td>E</td>\n",
       "      <td>VS1</td>\n",
       "      <td>56.9</td>\n",
       "      <td>65.0</td>\n",
       "      <td>327</td>\n",
       "      <td>4.05</td>\n",
       "      <td>4.07</td>\n",
       "      <td>2.31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.29</td>\n",
       "      <td>Premium</td>\n",
       "      <td>I</td>\n",
       "      <td>VS2</td>\n",
       "      <td>62.4</td>\n",
       "      <td>58.0</td>\n",
       "      <td>334</td>\n",
       "      <td>4.20</td>\n",
       "      <td>4.23</td>\n",
       "      <td>2.63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.31</td>\n",
       "      <td>Good</td>\n",
       "      <td>J</td>\n",
       "      <td>SI2</td>\n",
       "      <td>63.3</td>\n",
       "      <td>58.0</td>\n",
       "      <td>335</td>\n",
       "      <td>4.34</td>\n",
       "      <td>4.35</td>\n",
       "      <td>2.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   carat      cut color clarity  depth  table  price     x     y     z\n",
       "0   0.23    Ideal     E     SI2   61.5   55.0    326  3.95  3.98  2.43\n",
       "1   0.21  Premium     E     SI1   59.8   61.0    326  3.89  3.84  2.31\n",
       "2   0.23     Good     E     VS1   56.9   65.0    327  4.05  4.07  2.31\n",
       "3   0.29  Premium     I     VS2   62.4   58.0    334  4.20  4.23  2.63\n",
       "4   0.31     Good     J     SI2   63.3   58.0    335  4.34  4.35  2.75"
      ]
     },
     "execution_count": 125,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RangeIndex(start=0, stop=53940, step=1)"
      ]
     },
     "execution_count": 126,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y',\n",
       "       'z'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 127,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.23, 'Ideal', 'E', ..., 3.95, 3.98, 2.43],\n",
       "       [0.21, 'Premium', 'E', ..., 3.89, 3.84, 2.31],\n",
       "       [0.23, 'Good', 'E', ..., 4.05, 4.07, 2.31],\n",
       "       ..., \n",
       "       [0.7, 'Very Good', 'D', ..., 5.66, 5.68, 3.56],\n",
       "       [0.86, 'Premium', 'H', ..., 6.15, 6.12, 3.74],\n",
       "       [0.75, 'Ideal', 'D', ..., 5.83, 5.87, 3.64]], dtype=object)"
      ]
     },
     "execution_count": 128,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.array(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "darray=np.array(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "diamonds4=pd.DataFrame(data=darray ,    # values\n",
    "             index=a.index,    # 1st column as index\n",
    "              columns=a.columns)  # 1st row as the column names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>carat</th>\n",
       "      <th>cut</th>\n",
       "      <th>color</th>\n",
       "      <th>clarity</th>\n",
       "      <th>depth</th>\n",
       "      <th>table</th>\n",
       "      <th>price</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>z</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.23</td>\n",
       "      <td>Ideal</td>\n",
       "      <td>E</td>\n",
       "      <td>SI2</td>\n",
       "      <td>61.5</td>\n",
       "      <td>55</td>\n",
       "      <td>326</td>\n",
       "      <td>3.95</td>\n",
       "      <td>3.98</td>\n",
       "      <td>2.43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.21</td>\n",
       "      <td>Premium</td>\n",
       "      <td>E</td>\n",
       "      <td>SI1</td>\n",
       "      <td>59.8</td>\n",
       "      <td>61</td>\n",
       "      <td>326</td>\n",
       "      <td>3.89</td>\n",
       "      <td>3.84</td>\n",
       "      <td>2.31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.23</td>\n",
       "      <td>Good</td>\n",
       "      <td>E</td>\n",
       "      <td>VS1</td>\n",
       "      <td>56.9</td>\n",
       "      <td>65</td>\n",
       "      <td>327</td>\n",
       "      <td>4.05</td>\n",
       "      <td>4.07</td>\n",
       "      <td>2.31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.29</td>\n",
       "      <td>Premium</td>\n",
       "      <td>I</td>\n",
       "      <td>VS2</td>\n",
       "      <td>62.4</td>\n",
       "      <td>58</td>\n",
       "      <td>334</td>\n",
       "      <td>4.2</td>\n",
       "      <td>4.23</td>\n",
       "      <td>2.63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.31</td>\n",
       "      <td>Good</td>\n",
       "      <td>J</td>\n",
       "      <td>SI2</td>\n",
       "      <td>63.3</td>\n",
       "      <td>58</td>\n",
       "      <td>335</td>\n",
       "      <td>4.34</td>\n",
       "      <td>4.35</td>\n",
       "      <td>2.75</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  carat      cut color clarity depth table price     x     y     z\n",
       "0  0.23    Ideal     E     SI2  61.5    55   326  3.95  3.98  2.43\n",
       "1  0.21  Premium     E     SI1  59.8    61   326  3.89  3.84  2.31\n",
       "2  0.23     Good     E     VS1  56.9    65   327  4.05  4.07  2.31\n",
       "3  0.29  Premium     I     VS2  62.4    58   334   4.2  4.23  2.63\n",
       "4  0.31     Good     J     SI2  63.3    58   335  4.34  4.35  2.75"
      ]
     },
     "execution_count": 131,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "diamonds4.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    " from sklearn import datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "iris = datasets.load_iris()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "sklearn.datasets.base.Bunch"
      ]
     },
     "execution_count": 134,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(iris)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5"
      ]
     },
     "execution_count": 135,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(iris)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['__class__',\n",
       " '__contains__',\n",
       " '__delattr__',\n",
       " '__delitem__',\n",
       " '__dict__',\n",
       " '__dir__',\n",
       " '__doc__',\n",
       " '__eq__',\n",
       " '__format__',\n",
       " '__ge__',\n",
       " '__getattr__',\n",
       " '__getattribute__',\n",
       " '__getitem__',\n",
       " '__gt__',\n",
       " '__hash__',\n",
       " '__init__',\n",
       " '__iter__',\n",
       " '__le__',\n",
       " '__len__',\n",
       " '__lt__',\n",
       " '__module__',\n",
       " '__ne__',\n",
       " '__new__',\n",
       " '__reduce__',\n",
       " '__reduce_ex__',\n",
       " '__repr__',\n",
       " '__setattr__',\n",
       " '__setitem__',\n",
       " '__setstate__',\n",
       " '__sizeof__',\n",
       " '__str__',\n",
       " '__subclasshook__',\n",
       " '__weakref__',\n",
       " 'clear',\n",
       " 'copy',\n",
       " 'fromkeys',\n",
       " 'get',\n",
       " 'items',\n",
       " 'keys',\n",
       " 'pop',\n",
       " 'popitem',\n",
       " 'setdefault',\n",
       " 'update',\n",
       " 'values']"
      ]
     },
     "execution_count": 136,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dir(iris)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_values(['Iris Plants Database\\n\\nNotes\\n-----\\nData Set Characteristics:\\n    :Number of Instances: 150 (50 in each of three classes)\\n    :Number of Attributes: 4 numeric, predictive attributes and the class\\n    :Attribute Information:\\n        - sepal length in cm\\n        - sepal width in cm\\n        - petal length in cm\\n        - petal width in cm\\n        - class:\\n                - Iris-Setosa\\n                - Iris-Versicolour\\n                - Iris-Virginica\\n    :Summary Statistics:\\n\\n    ============== ==== ==== ======= ===== ====================\\n                    Min  Max   Mean    SD   Class Correlation\\n    ============== ==== ==== ======= ===== ====================\\n    sepal length:   4.3  7.9   5.84   0.83    0.7826\\n    sepal width:    2.0  4.4   3.05   0.43   -0.4194\\n    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)\\n    petal width:    0.1  2.5   1.20  0.76     0.9565  (high!)\\n    ============== ==== ==== ======= ===== ====================\\n\\n    :Missing Attribute Values: None\\n    :Class Distribution: 33.3% for each of 3 classes.\\n    :Creator: R.A. Fisher\\n    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\\n    :Date: July, 1988\\n\\nThis is a copy of UCI ML iris datasets.\\nhttp://archive.ics.uci.edu/ml/datasets/Iris\\n\\nThe famous Iris database, first used by Sir R.A Fisher\\n\\nThis is perhaps the best known database to be found in the\\npattern recognition literature.  Fisher\\'s paper is a classic in the field and\\nis referenced frequently to this day.  (See Duda & Hart, for example.)  The\\ndata set contains 3 classes of 50 instances each, where each class refers to a\\ntype of iris plant.  One class is linearly separable from the other 2; the\\nlatter are NOT linearly separable from each other.\\n\\nReferences\\n----------\\n   - Fisher,R.A. \"The use of multiple measurements in taxonomic problems\"\\n     Annual Eugenics, 7, Part II, 179-188 (1936); also in \"Contributions to\\n     Mathematical Statistics\" (John Wiley, NY, 1950).\\n   - Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.\\n     (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.\\n   - Dasarathy, B.V. (1980) \"Nosing Around the Neighborhood: A New System\\n     Structure and Classification Rule for Recognition in Partially Exposed\\n     Environments\".  IEEE Transactions on Pattern Analysis and Machine\\n     Intelligence, Vol. PAMI-2, No. 1, 67-71.\\n   - Gates, G.W. (1972) \"The Reduced Nearest Neighbor Rule\".  IEEE Transactions\\n     on Information Theory, May 1972, 431-433.\\n   - See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al\"s AUTOCLASS II\\n     conceptual clustering system finds 3 classes in the data.\\n   - Many, many more ...\\n', array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
       "       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
       "       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]), ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)'], array(['setosa', 'versicolor', 'virginica'], \n",
       "      dtype='<U10'), array([[ 5.1,  3.5,  1.4,  0.2],\n",
       "       [ 4.9,  3. ,  1.4,  0.2],\n",
       "       [ 4.7,  3.2,  1.3,  0.2],\n",
       "       [ 4.6,  3.1,  1.5,  0.2],\n",
       "       [ 5. ,  3.6,  1.4,  0.2],\n",
       "       [ 5.4,  3.9,  1.7,  0.4],\n",
       "       [ 4.6,  3.4,  1.4,  0.3],\n",
       "       [ 5. ,  3.4,  1.5,  0.2],\n",
       "       [ 4.4,  2.9,  1.4,  0.2],\n",
       "       [ 4.9,  3.1,  1.5,  0.1],\n",
       "       [ 5.4,  3.7,  1.5,  0.2],\n",
       "       [ 4.8,  3.4,  1.6,  0.2],\n",
       "       [ 4.8,  3. ,  1.4,  0.1],\n",
       "       [ 4.3,  3. ,  1.1,  0.1],\n",
       "       [ 5.8,  4. ,  1.2,  0.2],\n",
       "       [ 5.7,  4.4,  1.5,  0.4],\n",
       "       [ 5.4,  3.9,  1.3,  0.4],\n",
       "       [ 5.1,  3.5,  1.4,  0.3],\n",
       "       [ 5.7,  3.8,  1.7,  0.3],\n",
       "       [ 5.1,  3.8,  1.5,  0.3],\n",
       "       [ 5.4,  3.4,  1.7,  0.2],\n",
       "       [ 5.1,  3.7,  1.5,  0.4],\n",
       "       [ 4.6,  3.6,  1. ,  0.2],\n",
       "       [ 5.1,  3.3,  1.7,  0.5],\n",
       "       [ 4.8,  3.4,  1.9,  0.2],\n",
       "       [ 5. ,  3. ,  1.6,  0.2],\n",
       "       [ 5. ,  3.4,  1.6,  0.4],\n",
       "       [ 5.2,  3.5,  1.5,  0.2],\n",
       "       [ 5.2,  3.4,  1.4,  0.2],\n",
       "       [ 4.7,  3.2,  1.6,  0.2],\n",
       "       [ 4.8,  3.1,  1.6,  0.2],\n",
       "       [ 5.4,  3.4,  1.5,  0.4],\n",
       "       [ 5.2,  4.1,  1.5,  0.1],\n",
       "       [ 5.5,  4.2,  1.4,  0.2],\n",
       "       [ 4.9,  3.1,  1.5,  0.1],\n",
       "       [ 5. ,  3.2,  1.2,  0.2],\n",
       "       [ 5.5,  3.5,  1.3,  0.2],\n",
       "       [ 4.9,  3.1,  1.5,  0.1],\n",
       "       [ 4.4,  3. ,  1.3,  0.2],\n",
       "       [ 5.1,  3.4,  1.5,  0.2],\n",
       "       [ 5. ,  3.5,  1.3,  0.3],\n",
       "       [ 4.5,  2.3,  1.3,  0.3],\n",
       "       [ 4.4,  3.2,  1.3,  0.2],\n",
       "       [ 5. ,  3.5,  1.6,  0.6],\n",
       "       [ 5.1,  3.8,  1.9,  0.4],\n",
       "       [ 4.8,  3. ,  1.4,  0.3],\n",
       "       [ 5.1,  3.8,  1.6,  0.2],\n",
       "       [ 4.6,  3.2,  1.4,  0.2],\n",
       "       [ 5.3,  3.7,  1.5,  0.2],\n",
       "       [ 5. ,  3.3,  1.4,  0.2],\n",
       "       [ 7. ,  3.2,  4.7,  1.4],\n",
       "       [ 6.4,  3.2,  4.5,  1.5],\n",
       "       [ 6.9,  3.1,  4.9,  1.5],\n",
       "       [ 5.5,  2.3,  4. ,  1.3],\n",
       "       [ 6.5,  2.8,  4.6,  1.5],\n",
       "       [ 5.7,  2.8,  4.5,  1.3],\n",
       "       [ 6.3,  3.3,  4.7,  1.6],\n",
       "       [ 4.9,  2.4,  3.3,  1. ],\n",
       "       [ 6.6,  2.9,  4.6,  1.3],\n",
       "       [ 5.2,  2.7,  3.9,  1.4],\n",
       "       [ 5. ,  2. ,  3.5,  1. ],\n",
       "       [ 5.9,  3. ,  4.2,  1.5],\n",
       "       [ 6. ,  2.2,  4. ,  1. ],\n",
       "       [ 6.1,  2.9,  4.7,  1.4],\n",
       "       [ 5.6,  2.9,  3.6,  1.3],\n",
       "       [ 6.7,  3.1,  4.4,  1.4],\n",
       "       [ 5.6,  3. ,  4.5,  1.5],\n",
       "       [ 5.8,  2.7,  4.1,  1. ],\n",
       "       [ 6.2,  2.2,  4.5,  1.5],\n",
       "       [ 5.6,  2.5,  3.9,  1.1],\n",
       "       [ 5.9,  3.2,  4.8,  1.8],\n",
       "       [ 6.1,  2.8,  4. ,  1.3],\n",
       "       [ 6.3,  2.5,  4.9,  1.5],\n",
       "       [ 6.1,  2.8,  4.7,  1.2],\n",
       "       [ 6.4,  2.9,  4.3,  1.3],\n",
       "       [ 6.6,  3. ,  4.4,  1.4],\n",
       "       [ 6.8,  2.8,  4.8,  1.4],\n",
       "       [ 6.7,  3. ,  5. ,  1.7],\n",
       "       [ 6. ,  2.9,  4.5,  1.5],\n",
       "       [ 5.7,  2.6,  3.5,  1. ],\n",
       "       [ 5.5,  2.4,  3.8,  1.1],\n",
       "       [ 5.5,  2.4,  3.7,  1. ],\n",
       "       [ 5.8,  2.7,  3.9,  1.2],\n",
       "       [ 6. ,  2.7,  5.1,  1.6],\n",
       "       [ 5.4,  3. ,  4.5,  1.5],\n",
       "       [ 6. ,  3.4,  4.5,  1.6],\n",
       "       [ 6.7,  3.1,  4.7,  1.5],\n",
       "       [ 6.3,  2.3,  4.4,  1.3],\n",
       "       [ 5.6,  3. ,  4.1,  1.3],\n",
       "       [ 5.5,  2.5,  4. ,  1.3],\n",
       "       [ 5.5,  2.6,  4.4,  1.2],\n",
       "       [ 6.1,  3. ,  4.6,  1.4],\n",
       "       [ 5.8,  2.6,  4. ,  1.2],\n",
       "       [ 5. ,  2.3,  3.3,  1. ],\n",
       "       [ 5.6,  2.7,  4.2,  1.3],\n",
       "       [ 5.7,  3. ,  4.2,  1.2],\n",
       "       [ 5.7,  2.9,  4.2,  1.3],\n",
       "       [ 6.2,  2.9,  4.3,  1.3],\n",
       "       [ 5.1,  2.5,  3. ,  1.1],\n",
       "       [ 5.7,  2.8,  4.1,  1.3],\n",
       "       [ 6.3,  3.3,  6. ,  2.5],\n",
       "       [ 5.8,  2.7,  5.1,  1.9],\n",
       "       [ 7.1,  3. ,  5.9,  2.1],\n",
       "       [ 6.3,  2.9,  5.6,  1.8],\n",
       "       [ 6.5,  3. ,  5.8,  2.2],\n",
       "       [ 7.6,  3. ,  6.6,  2.1],\n",
       "       [ 4.9,  2.5,  4.5,  1.7],\n",
       "       [ 7.3,  2.9,  6.3,  1.8],\n",
       "       [ 6.7,  2.5,  5.8,  1.8],\n",
       "       [ 7.2,  3.6,  6.1,  2.5],\n",
       "       [ 6.5,  3.2,  5.1,  2. ],\n",
       "       [ 6.4,  2.7,  5.3,  1.9],\n",
       "       [ 6.8,  3. ,  5.5,  2.1],\n",
       "       [ 5.7,  2.5,  5. ,  2. ],\n",
       "       [ 5.8,  2.8,  5.1,  2.4],\n",
       "       [ 6.4,  3.2,  5.3,  2.3],\n",
       "       [ 6.5,  3. ,  5.5,  1.8],\n",
       "       [ 7.7,  3.8,  6.7,  2.2],\n",
       "       [ 7.7,  2.6,  6.9,  2.3],\n",
       "       [ 6. ,  2.2,  5. ,  1.5],\n",
       "       [ 6.9,  3.2,  5.7,  2.3],\n",
       "       [ 5.6,  2.8,  4.9,  2. ],\n",
       "       [ 7.7,  2.8,  6.7,  2. ],\n",
       "       [ 6.3,  2.7,  4.9,  1.8],\n",
       "       [ 6.7,  3.3,  5.7,  2.1],\n",
       "       [ 7.2,  3.2,  6. ,  1.8],\n",
       "       [ 6.2,  2.8,  4.8,  1.8],\n",
       "       [ 6.1,  3. ,  4.9,  1.8],\n",
       "       [ 6.4,  2.8,  5.6,  2.1],\n",
       "       [ 7.2,  3. ,  5.8,  1.6],\n",
       "       [ 7.4,  2.8,  6.1,  1.9],\n",
       "       [ 7.9,  3.8,  6.4,  2. ],\n",
       "       [ 6.4,  2.8,  5.6,  2.2],\n",
       "       [ 6.3,  2.8,  5.1,  1.5],\n",
       "       [ 6.1,  2.6,  5.6,  1.4],\n",
       "       [ 7.7,  3. ,  6.1,  2.3],\n",
       "       [ 6.3,  3.4,  5.6,  2.4],\n",
       "       [ 6.4,  3.1,  5.5,  1.8],\n",
       "       [ 6. ,  3. ,  4.8,  1.8],\n",
       "       [ 6.9,  3.1,  5.4,  2.1],\n",
       "       [ 6.7,  3.1,  5.6,  2.4],\n",
       "       [ 6.9,  3.1,  5.1,  2.3],\n",
       "       [ 5.8,  2.7,  5.1,  1.9],\n",
       "       [ 6.8,  3.2,  5.9,  2.3],\n",
       "       [ 6.7,  3.3,  5.7,  2.5],\n",
       "       [ 6.7,  3. ,  5.2,  2.3],\n",
       "       [ 6.3,  2.5,  5. ,  1.9],\n",
       "       [ 6.5,  3. ,  5.2,  2. ],\n",
       "       [ 6.2,  3.4,  5.4,  2.3],\n",
       "       [ 5.9,  3. ,  5.1,  1.8]])])"
      ]
     },
     "execution_count": 137,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris.values()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_items([('DESCR', 'Iris Plants Database\\n\\nNotes\\n-----\\nData Set Characteristics:\\n    :Number of Instances: 150 (50 in each of three classes)\\n    :Number of Attributes: 4 numeric, predictive attributes and the class\\n    :Attribute Information:\\n        - sepal length in cm\\n        - sepal width in cm\\n        - petal length in cm\\n        - petal width in cm\\n        - class:\\n                - Iris-Setosa\\n                - Iris-Versicolour\\n                - Iris-Virginica\\n    :Summary Statistics:\\n\\n    ============== ==== ==== ======= ===== ====================\\n                    Min  Max   Mean    SD   Class Correlation\\n    ============== ==== ==== ======= ===== ====================\\n    sepal length:   4.3  7.9   5.84   0.83    0.7826\\n    sepal width:    2.0  4.4   3.05   0.43   -0.4194\\n    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)\\n    petal width:    0.1  2.5   1.20  0.76     0.9565  (high!)\\n    ============== ==== ==== ======= ===== ====================\\n\\n    :Missing Attribute Values: None\\n    :Class Distribution: 33.3% for each of 3 classes.\\n    :Creator: R.A. Fisher\\n    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\\n    :Date: July, 1988\\n\\nThis is a copy of UCI ML iris datasets.\\nhttp://archive.ics.uci.edu/ml/datasets/Iris\\n\\nThe famous Iris database, first used by Sir R.A Fisher\\n\\nThis is perhaps the best known database to be found in the\\npattern recognition literature.  Fisher\\'s paper is a classic in the field and\\nis referenced frequently to this day.  (See Duda & Hart, for example.)  The\\ndata set contains 3 classes of 50 instances each, where each class refers to a\\ntype of iris plant.  One class is linearly separable from the other 2; the\\nlatter are NOT linearly separable from each other.\\n\\nReferences\\n----------\\n   - Fisher,R.A. \"The use of multiple measurements in taxonomic problems\"\\n     Annual Eugenics, 7, Part II, 179-188 (1936); also in \"Contributions to\\n     Mathematical Statistics\" (John Wiley, NY, 1950).\\n   - Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.\\n     (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.\\n   - Dasarathy, B.V. (1980) \"Nosing Around the Neighborhood: A New System\\n     Structure and Classification Rule for Recognition in Partially Exposed\\n     Environments\".  IEEE Transactions on Pattern Analysis and Machine\\n     Intelligence, Vol. PAMI-2, No. 1, 67-71.\\n   - Gates, G.W. (1972) \"The Reduced Nearest Neighbor Rule\".  IEEE Transactions\\n     on Information Theory, May 1972, 431-433.\\n   - See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al\"s AUTOCLASS II\\n     conceptual clustering system finds 3 classes in the data.\\n   - Many, many more ...\\n'), ('target', array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
       "       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
       "       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])), ('feature_names', ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']), ('target_names', array(['setosa', 'versicolor', 'virginica'], \n",
       "      dtype='<U10')), ('data', array([[ 5.1,  3.5,  1.4,  0.2],\n",
       "       [ 4.9,  3. ,  1.4,  0.2],\n",
       "       [ 4.7,  3.2,  1.3,  0.2],\n",
       "       [ 4.6,  3.1,  1.5,  0.2],\n",
       "       [ 5. ,  3.6,  1.4,  0.2],\n",
       "       [ 5.4,  3.9,  1.7,  0.4],\n",
       "       [ 4.6,  3.4,  1.4,  0.3],\n",
       "       [ 5. ,  3.4,  1.5,  0.2],\n",
       "       [ 4.4,  2.9,  1.4,  0.2],\n",
       "       [ 4.9,  3.1,  1.5,  0.1],\n",
       "       [ 5.4,  3.7,  1.5,  0.2],\n",
       "       [ 4.8,  3.4,  1.6,  0.2],\n",
       "       [ 4.8,  3. ,  1.4,  0.1],\n",
       "       [ 4.3,  3. ,  1.1,  0.1],\n",
       "       [ 5.8,  4. ,  1.2,  0.2],\n",
       "       [ 5.7,  4.4,  1.5,  0.4],\n",
       "       [ 5.4,  3.9,  1.3,  0.4],\n",
       "       [ 5.1,  3.5,  1.4,  0.3],\n",
       "       [ 5.7,  3.8,  1.7,  0.3],\n",
       "       [ 5.1,  3.8,  1.5,  0.3],\n",
       "       [ 5.4,  3.4,  1.7,  0.2],\n",
       "       [ 5.1,  3.7,  1.5,  0.4],\n",
       "       [ 4.6,  3.6,  1. ,  0.2],\n",
       "       [ 5.1,  3.3,  1.7,  0.5],\n",
       "       [ 4.8,  3.4,  1.9,  0.2],\n",
       "       [ 5. ,  3. ,  1.6,  0.2],\n",
       "       [ 5. ,  3.4,  1.6,  0.4],\n",
       "       [ 5.2,  3.5,  1.5,  0.2],\n",
       "       [ 5.2,  3.4,  1.4,  0.2],\n",
       "       [ 4.7,  3.2,  1.6,  0.2],\n",
       "       [ 4.8,  3.1,  1.6,  0.2],\n",
       "       [ 5.4,  3.4,  1.5,  0.4],\n",
       "       [ 5.2,  4.1,  1.5,  0.1],\n",
       "       [ 5.5,  4.2,  1.4,  0.2],\n",
       "       [ 4.9,  3.1,  1.5,  0.1],\n",
       "       [ 5. ,  3.2,  1.2,  0.2],\n",
       "       [ 5.5,  3.5,  1.3,  0.2],\n",
       "       [ 4.9,  3.1,  1.5,  0.1],\n",
       "       [ 4.4,  3. ,  1.3,  0.2],\n",
       "       [ 5.1,  3.4,  1.5,  0.2],\n",
       "       [ 5. ,  3.5,  1.3,  0.3],\n",
       "       [ 4.5,  2.3,  1.3,  0.3],\n",
       "       [ 4.4,  3.2,  1.3,  0.2],\n",
       "       [ 5. ,  3.5,  1.6,  0.6],\n",
       "       [ 5.1,  3.8,  1.9,  0.4],\n",
       "       [ 4.8,  3. ,  1.4,  0.3],\n",
       "       [ 5.1,  3.8,  1.6,  0.2],\n",
       "       [ 4.6,  3.2,  1.4,  0.2],\n",
       "       [ 5.3,  3.7,  1.5,  0.2],\n",
       "       [ 5. ,  3.3,  1.4,  0.2],\n",
       "       [ 7. ,  3.2,  4.7,  1.4],\n",
       "       [ 6.4,  3.2,  4.5,  1.5],\n",
       "       [ 6.9,  3.1,  4.9,  1.5],\n",
       "       [ 5.5,  2.3,  4. ,  1.3],\n",
       "       [ 6.5,  2.8,  4.6,  1.5],\n",
       "       [ 5.7,  2.8,  4.5,  1.3],\n",
       "       [ 6.3,  3.3,  4.7,  1.6],\n",
       "       [ 4.9,  2.4,  3.3,  1. ],\n",
       "       [ 6.6,  2.9,  4.6,  1.3],\n",
       "       [ 5.2,  2.7,  3.9,  1.4],\n",
       "       [ 5. ,  2. ,  3.5,  1. ],\n",
       "       [ 5.9,  3. ,  4.2,  1.5],\n",
       "       [ 6. ,  2.2,  4. ,  1. ],\n",
       "       [ 6.1,  2.9,  4.7,  1.4],\n",
       "       [ 5.6,  2.9,  3.6,  1.3],\n",
       "       [ 6.7,  3.1,  4.4,  1.4],\n",
       "       [ 5.6,  3. ,  4.5,  1.5],\n",
       "       [ 5.8,  2.7,  4.1,  1. ],\n",
       "       [ 6.2,  2.2,  4.5,  1.5],\n",
       "       [ 5.6,  2.5,  3.9,  1.1],\n",
       "       [ 5.9,  3.2,  4.8,  1.8],\n",
       "       [ 6.1,  2.8,  4. ,  1.3],\n",
       "       [ 6.3,  2.5,  4.9,  1.5],\n",
       "       [ 6.1,  2.8,  4.7,  1.2],\n",
       "       [ 6.4,  2.9,  4.3,  1.3],\n",
       "       [ 6.6,  3. ,  4.4,  1.4],\n",
       "       [ 6.8,  2.8,  4.8,  1.4],\n",
       "       [ 6.7,  3. ,  5. ,  1.7],\n",
       "       [ 6. ,  2.9,  4.5,  1.5],\n",
       "       [ 5.7,  2.6,  3.5,  1. ],\n",
       "       [ 5.5,  2.4,  3.8,  1.1],\n",
       "       [ 5.5,  2.4,  3.7,  1. ],\n",
       "       [ 5.8,  2.7,  3.9,  1.2],\n",
       "       [ 6. ,  2.7,  5.1,  1.6],\n",
       "       [ 5.4,  3. ,  4.5,  1.5],\n",
       "       [ 6. ,  3.4,  4.5,  1.6],\n",
       "       [ 6.7,  3.1,  4.7,  1.5],\n",
       "       [ 6.3,  2.3,  4.4,  1.3],\n",
       "       [ 5.6,  3. ,  4.1,  1.3],\n",
       "       [ 5.5,  2.5,  4. ,  1.3],\n",
       "       [ 5.5,  2.6,  4.4,  1.2],\n",
       "       [ 6.1,  3. ,  4.6,  1.4],\n",
       "       [ 5.8,  2.6,  4. ,  1.2],\n",
       "       [ 5. ,  2.3,  3.3,  1. ],\n",
       "       [ 5.6,  2.7,  4.2,  1.3],\n",
       "       [ 5.7,  3. ,  4.2,  1.2],\n",
       "       [ 5.7,  2.9,  4.2,  1.3],\n",
       "       [ 6.2,  2.9,  4.3,  1.3],\n",
       "       [ 5.1,  2.5,  3. ,  1.1],\n",
       "       [ 5.7,  2.8,  4.1,  1.3],\n",
       "       [ 6.3,  3.3,  6. ,  2.5],\n",
       "       [ 5.8,  2.7,  5.1,  1.9],\n",
       "       [ 7.1,  3. ,  5.9,  2.1],\n",
       "       [ 6.3,  2.9,  5.6,  1.8],\n",
       "       [ 6.5,  3. ,  5.8,  2.2],\n",
       "       [ 7.6,  3. ,  6.6,  2.1],\n",
       "       [ 4.9,  2.5,  4.5,  1.7],\n",
       "       [ 7.3,  2.9,  6.3,  1.8],\n",
       "       [ 6.7,  2.5,  5.8,  1.8],\n",
       "       [ 7.2,  3.6,  6.1,  2.5],\n",
       "       [ 6.5,  3.2,  5.1,  2. ],\n",
       "       [ 6.4,  2.7,  5.3,  1.9],\n",
       "       [ 6.8,  3. ,  5.5,  2.1],\n",
       "       [ 5.7,  2.5,  5. ,  2. ],\n",
       "       [ 5.8,  2.8,  5.1,  2.4],\n",
       "       [ 6.4,  3.2,  5.3,  2.3],\n",
       "       [ 6.5,  3. ,  5.5,  1.8],\n",
       "       [ 7.7,  3.8,  6.7,  2.2],\n",
       "       [ 7.7,  2.6,  6.9,  2.3],\n",
       "       [ 6. ,  2.2,  5. ,  1.5],\n",
       "       [ 6.9,  3.2,  5.7,  2.3],\n",
       "       [ 5.6,  2.8,  4.9,  2. ],\n",
       "       [ 7.7,  2.8,  6.7,  2. ],\n",
       "       [ 6.3,  2.7,  4.9,  1.8],\n",
       "       [ 6.7,  3.3,  5.7,  2.1],\n",
       "       [ 7.2,  3.2,  6. ,  1.8],\n",
       "       [ 6.2,  2.8,  4.8,  1.8],\n",
       "       [ 6.1,  3. ,  4.9,  1.8],\n",
       "       [ 6.4,  2.8,  5.6,  2.1],\n",
       "       [ 7.2,  3. ,  5.8,  1.6],\n",
       "       [ 7.4,  2.8,  6.1,  1.9],\n",
       "       [ 7.9,  3.8,  6.4,  2. ],\n",
       "       [ 6.4,  2.8,  5.6,  2.2],\n",
       "       [ 6.3,  2.8,  5.1,  1.5],\n",
       "       [ 6.1,  2.6,  5.6,  1.4],\n",
       "       [ 7.7,  3. ,  6.1,  2.3],\n",
       "       [ 6.3,  3.4,  5.6,  2.4],\n",
       "       [ 6.4,  3.1,  5.5,  1.8],\n",
       "       [ 6. ,  3. ,  4.8,  1.8],\n",
       "       [ 6.9,  3.1,  5.4,  2.1],\n",
       "       [ 6.7,  3.1,  5.6,  2.4],\n",
       "       [ 6.9,  3.1,  5.1,  2.3],\n",
       "       [ 5.8,  2.7,  5.1,  1.9],\n",
       "       [ 6.8,  3.2,  5.9,  2.3],\n",
       "       [ 6.7,  3.3,  5.7,  2.5],\n",
       "       [ 6.7,  3. ,  5.2,  2.3],\n",
       "       [ 6.3,  2.5,  5. ,  1.9],\n",
       "       [ 6.5,  3. ,  5.2,  2. ],\n",
       "       [ 6.2,  3.4,  5.4,  2.3],\n",
       "       [ 5.9,  3. ,  5.1,  1.8]]))])"
      ]
     },
     "execution_count": 138,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris.items()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import LabelEncoder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Also see http://nbviewer.jupyter.org/github/ogrisel/parallel_ml_tutorial/blob/master/rendered_notebooks/04%20-%20Pandas%20and%20Heterogeneous%20Data%20Modeling.ipynb"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [Root]",
   "language": "python",
   "name": "Python [Root]"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}