Skip to content

Instantly share code, notes, and snippets.

@Farheen2302
Last active August 29, 2015 14:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Farheen2302/b4ad18f34ca40ba875a2 to your computer and use it in GitHub Desktop.
Save Farheen2302/b4ad18f34ca40ba875a2 to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import os as os"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'/home/farheen'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"os.getcwd()#for the ease of "
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"['Data_An30_05_15~',\n",
" 'Cheat_Sheet',\n",
" 'Mail',\n",
" 'NetBeansProjects',\n",
" 'day_27_06.txt~',\n",
" '.gphoto',\n",
" 'MyBlogs',\n",
" 'GuidelineZAPExtensionsAddOns1.0.pdf',\n",
" '.rstudio-desktop',\n",
" 'p.cpp~',\n",
" '.tuxtype',\n",
" 'examples.desktop',\n",
" 'Rcpp_prog2.cpp~',\n",
" 'Cheat_Sheet1',\n",
" 'Rccp14.cpp',\n",
" 'pa.odp_1odp',\n",
" 'newContacts.txt',\n",
" 'eclipse-standard-luna-R-linux-gtk-x86_64.tar.gz',\n",
" '.hex-a-hop',\n",
" 'linux-3.18.9',\n",
" 'new_del.c~',\n",
" 'quiz.txt',\n",
" 'Student.txt~',\n",
" 'JCatalog',\n",
" 'learning',\n",
" 'Music',\n",
" 'R_29_05~',\n",
" 'encryption.c~',\n",
" '.thunderbird',\n",
" 'R_learning.txt~',\n",
" 'Downloads',\n",
" 'apt_c.c~',\n",
" '.bash_history',\n",
" 'GHA_essay~',\n",
" '.~lock.rapgod.odt#',\n",
" 'c_test.c~',\n",
" 'Rcpp14.cpp',\n",
" 'rprog-015_Principles_of_Analytic_Graphs.txt',\n",
" 'c_test.c',\n",
" '.subversion',\n",
" '.muttrc',\n",
" '.Xauthority',\n",
" 'edx1',\n",
" 'Untitled8.ipynb',\n",
" 'ruby',\n",
" 'luljmi1',\n",
" '.npm',\n",
" 'cc.c~',\n",
" '.selected_editor',\n",
" 'largest_Number.c~',\n",
" 'prac_tem.cpp~',\n",
" '.gnome2_private',\n",
" '.profile',\n",
" 'Rcpp17.cpp',\n",
" '.Xauthority.LOW5JX',\n",
" '.rnd',\n",
" 'High_perf_R_1_06',\n",
" 'rprog-015_Missing_Values.txt',\n",
" 'Pictures',\n",
" 'xdman.jar',\n",
" '.fetchmailrc',\n",
" 'Pd_Excel.xlsx',\n",
" 'private',\n",
" 'Videos',\n",
" 'Pandas_post(II)_7_06.ipynb',\n",
" 'Data_An30_05_15',\n",
" 'df.txt~',\n",
" '.msmtprc~',\n",
" 'octave-workspace',\n",
" 'primes.c~',\n",
" 'prac_file.cpp~',\n",
" 'subcript_ovr.cpp~',\n",
" 'Pandas3_06_6.ipynb',\n",
" 'ghc',\n",
" 'tar',\n",
" '.briquolo',\n",
" 'Untitled2.ipynb',\n",
" '.cf',\n",
" 'untitled_1odt',\n",
" 'day_28_05~',\n",
" '.PlayOnLinux',\n",
" 'Untitled1.ipynb',\n",
" 'Documents',\n",
" 'canopy',\n",
" 'Templates',\n",
" 'df.txt',\n",
" 'window.c~',\n",
" 'sulluhead.h',\n",
" 'day_27_06.txt',\n",
" 'xdman.zip',\n",
" 'httpd.conf',\n",
" 'Untitled3.ipynb',\n",
" 'VirtualBox VMs',\n",
" 'asm 5.9.25 x86',\n",
" 'prac_op.cpp~',\n",
" 'c_test.c~~',\n",
" '.xsession-errors.old',\n",
" 'rattle_blog',\n",
" 'blog_27_06',\n",
" 'Analytics_edge_cmmnd',\n",
" 'oops_sh',\n",
" 'my_server.py~',\n",
" 'DataStatsPy',\n",
" '#include <iostream.h>',\n",
" '.ipynb_checkpoints',\n",
" 'day_28_05',\n",
" 'pp2.py',\n",
" 'Untitled.ipynb',\n",
" '.java',\n",
" '.bashrc~',\n",
" '.barrage.hscr',\n",
" 'Student.txt',\n",
" '.RData',\n",
" 'qq15',\n",
" 'R_file_26_06',\n",
" '.octave_hist',\n",
" 'Untitled5.ipynb',\n",
" 'pandas3_9_06.ipynb',\n",
" 'git',\n",
" 'firefox',\n",
" 'ftp%20shahjahan.pptx_0odp',\n",
" 'interview_8_06.txt',\n",
" 'libpng-1.5.15.tar.gz',\n",
" 'Untitled10.ipynb',\n",
" 'icon.png',\n",
" '.gnome',\n",
" 'intern.ods_0ods',\n",
" '.enstaller4rc',\n",
" 'SystemExamp.c~',\n",
" '.esmtprc',\n",
" 'matcheck.txt~',\n",
" 'quiz.txt~',\n",
" 'httpd-2.4.10.tar',\n",
" 'anaconda',\n",
" '.ipython',\n",
" 'trans.c~',\n",
" 'Data_summer.ipynb',\n",
" 'DataConversion3.cpp~',\n",
" 'vmware',\n",
" '.gvfs',\n",
" 'desktop',\n",
" 'comp~',\n",
" 'FarheenNilofer.compressed.pdf',\n",
" 'luljmi1.2',\n",
" 'Rcpp_prog5.cpp~',\n",
" 'Q19.docx',\n",
" 'untitled_0ods',\n",
" 'Untitled4.ipynb',\n",
" '.xsession-errors',\n",
" 'Untitled12.ipynb',\n",
" 'ourbootloader.asm',\n",
" '.Xauthority.734IKX',\n",
" 'q',\n",
" 'Rcpp_prog8.cpp~',\n",
" 'largest_Number.c',\n",
" 'R_29_05',\n",
" '.gnome2',\n",
" 'boot.bin',\n",
" 'dlt.c~',\n",
" 'sp.c~',\n",
" 'xdm-helper',\n",
" 'pandas_1_06.ipynb',\n",
" 'my_client.py~',\n",
" 'a.out',\n",
" 'A_TimeCm_2_06',\n",
" 'anti.c~',\n",
" 'Canopy',\n",
" 'lesson5_8_06.ipynb',\n",
" '.gksu.lock',\n",
" 'question_cc',\n",
" 'rmcdr_input.csv',\n",
" 'GHA_essay',\n",
" '.wine',\n",
" 'dp1.c~',\n",
" 'RCPP_Code',\n",
" 'comp',\n",
" '.pki',\n",
" 'lords.txt',\n",
" '.Xauthority.O5IGKX',\n",
" 'usr',\n",
" 'oops_sh~',\n",
" 'learningR_7_06',\n",
" '.viminfo',\n",
" 'https:',\n",
" 'rough.',\n",
" 'prac_line.cpp~',\n",
" 'PASSPORT',\n",
" 'lords.txt~',\n",
" 'lul_lab3.ppt_0odp',\n",
" 'WHO_Europe.csv',\n",
" 'c_test2.c',\n",
" '.git-credential-cache',\n",
" '.netbeans-derby',\n",
" 'Untitled11.ipynb',\n",
" 'prac_compConver.cpp~',\n",
" '.xinputrc',\n",
" 'color_check.c~',\n",
" 'DataAnalysis_withPython2.ipynb',\n",
" 'Rcpp_prog10.cpp~',\n",
" 'CR1_W1_D1',\n",
" 'Coursera_Docs',\n",
" 'diamonds-data',\n",
" '.dbus',\n",
" 'Pandas2_2_06.ipynb',\n",
" 'Desktop',\n",
" 'aafever',\n",
" 'sp.c',\n",
" 'Pandas_1_06.ipynb',\n",
" '.bash_logout',\n",
" '.continuum',\n",
" '.netbeans',\n",
" 'man',\n",
" 'feminism.doc_0odt',\n",
" 'pandas_les4_8_06.ipynb',\n",
" 'LINE.txt~',\n",
" 'jdk1.7.0_51',\n",
" 'qq15.o',\n",
" '.macromedia',\n",
" 'primes_effi.c~',\n",
" 'shr',\n",
" 'DataAnalysis',\n",
" 'httpd-2.4.10',\n",
" 'R_maths',\n",
" 'bin',\n",
" '.msmtprc',\n",
" 'your_new_directory',\n",
" '.ICEauthority',\n",
" '.Xauthority.SHQIKX',\n",
" '.msmtprc~~',\n",
" 'nsnake.c~',\n",
" 'venv',\n",
" 'SHA1Sum',\n",
" 'Untitled7.ipynb',\n",
" 'count_digit.c~',\n",
" 'Pandas_post(II)_7_06-Copy1.ipynb',\n",
" 'filehand.cpp~',\n",
" 'Kali-Linux-1.1.0c-vm-amd64',\n",
" '.compiz',\n",
" 'blog',\n",
" 'Learning%20Path_Interns%202015.xlsx_0ods',\n",
" 'pa_1.odp_1odp',\n",
" 'lul2.ppt_0odp',\n",
" 'luljmi1.1',\n",
" 'Rcpp_prog3.cpp~',\n",
" 'python_27_05.ipynb',\n",
" 'AcrobatReader-6.0.1.exe',\n",
" 'ut',\n",
" 'temp',\n",
" '.vimrc',\n",
" 'Rcpp_prog6.cpp~',\n",
" 'how_to_use.txt',\n",
" 'Enthought',\n",
" '.esmtprc~',\n",
" 'oop_doc.docx',\n",
" 'ipython-3.1.0',\n",
" 'shr~',\n",
" 'new_del.cpp~',\n",
" 'rattle_blog.odt_1odt',\n",
" 'Untitled6.ipynb',\n",
" 'decStat_web',\n",
" '.mozilla',\n",
" 'apt_c.c',\n",
" '.vmware',\n",
" 'MAIL%20Server%20Presentation%20FInal.ppt_0odp',\n",
" '.canopy',\n",
" 'pyth.py~',\n",
" '.Xauthority.GA57JX',\n",
" 'Public',\n",
" 'workspace',\n",
" '.local',\n",
" 'Linux Lab',\n",
" 'ZAP_Understanding',\n",
" '.cache',\n",
" 'blog_27_06~',\n",
" 'eclipse',\n",
" 'datasciencecoursera',\n",
" '.config',\n",
" 'apr-util-1.5.3.tar',\n",
" 'R_learning.txt',\n",
" 'ipython-3.1.0-py2-none-any.whl#md5=c9fb71b20ca7223649a57a4b965588a5',\n",
" 'prac_inherit.cpp~',\n",
" '.dmrc',\n",
" 'SnakeAndLadderGame',\n",
" 'DataCoversion1.cpp~',\n",
" 'r',\n",
" '.Rhistory',\n",
" 'Rcpp_prog7.cpp~',\n",
" 'demo.c~',\n",
" 'Untitled9.ipynb',\n",
" '.bashrc',\n",
" 'dead.letter',\n",
" '.procmailrc',\n",
" '.gitconfig',\n",
" '.icons',\n",
" 'DecisionStats',\n",
" 'feamingham.R',\n",
" 'LINE.txt',\n",
" 'README.md',\n",
" '.msmtp.log',\n",
" 'hello.c~',\n",
" 'mysql-connector-java-5.1.33',\n",
" 'ch.txt',\n",
" 'tmp',\n",
" 'untitled-1',\n",
" '.muttrc~',\n",
" 'line.txt',\n",
" '.bootloader.asm.swp',\n",
" 'Rcpp_prog1.cpp~',\n",
" 'Untitled 1.odp',\n",
" 'prime.txt',\n",
" '.gconf',\n",
" 'plot1.png',\n",
" 'Rcpp_prog4.cpp~',\n",
" 'R',\n",
" 'prac_copy.cpp~',\n",
" 'jdk-7u51-linux-x64.tar.gz',\n",
" 'Rcpp_prog9.cpp~',\n",
" 'ISO',\n",
" '.git',\n",
" '0',\n",
" 'Result.txt',\n",
" 'c_test2.c~',\n",
" '.bashrc-anaconda.bak',\n",
" 'CE',\n",
" 'pascal.c~',\n",
" '.procmaillog',\n",
" '.gstreamer-0.10',\n",
" '.Xauthority.VDUJKX',\n",
" '.mysql_history']"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"os.listdir(os.getcwd())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"a = os.getcwd()\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'/home/farheen'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"bigdiamonds = pd.read_csv('/home/farheen/Desktop/BigDiamonds.csv')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 598024 entries, 0 to 598023\n",
"Data columns (total 12 columns):\n",
"carat 598024 non-null float64\n",
"cut 598024 non-null object\n",
"color 598024 non-null object\n",
"clarity 598024 non-null object\n",
"table 598024 non-null float64\n",
"depth 598024 non-null float64\n",
"cert 598024 non-null object\n",
"measurements 597978 non-null object\n",
"price 597311 non-null float64\n",
"x 596209 non-null float64\n",
"y 596172 non-null float64\n",
"z 595480 non-null float64\n",
"dtypes: float64(7), object(5)\n",
"memory usage: 59.3+ MB\n"
]
}
],
"source": [
"bigdiamonds.info()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>carat</th>\n",
" <th>cut</th>\n",
" <th>color</th>\n",
" <th>clarity</th>\n",
" <th>table</th>\n",
" <th>depth</th>\n",
" <th>cert</th>\n",
" <th>measurements</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.25</td>\n",
" <td>V.Good</td>\n",
" <td>K</td>\n",
" <td>I1</td>\n",
" <td>59</td>\n",
" <td>63.7</td>\n",
" <td>GIA</td>\n",
" <td>3.96 x 3.95 x 2.52</td>\n",
" <td>NaN</td>\n",
" <td>3.96</td>\n",
" <td>3.95</td>\n",
" <td>2.52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.23</td>\n",
" <td>Good</td>\n",
" <td>G</td>\n",
" <td>I1</td>\n",
" <td>61</td>\n",
" <td>58.1</td>\n",
" <td>GIA</td>\n",
" <td>4.00 x 4.05 x 2.30</td>\n",
" <td>NaN</td>\n",
" <td>4.00</td>\n",
" <td>4.05</td>\n",
" <td>2.30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.34</td>\n",
" <td>Good</td>\n",
" <td>J</td>\n",
" <td>I2</td>\n",
" <td>58</td>\n",
" <td>58.7</td>\n",
" <td>GIA</td>\n",
" <td>4.56 x 4.53 x 2.67</td>\n",
" <td>NaN</td>\n",
" <td>4.56</td>\n",
" <td>4.53</td>\n",
" <td>2.67</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.21</td>\n",
" <td>V.Good</td>\n",
" <td>D</td>\n",
" <td>I1</td>\n",
" <td>60</td>\n",
" <td>60.6</td>\n",
" <td>GIA</td>\n",
" <td>3.80 x 3.82 x 2.31</td>\n",
" <td>NaN</td>\n",
" <td>3.80</td>\n",
" <td>3.82</td>\n",
" <td>2.31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.31</td>\n",
" <td>V.Good</td>\n",
" <td>K</td>\n",
" <td>I1</td>\n",
" <td>59</td>\n",
" <td>62.2</td>\n",
" <td>EGL</td>\n",
" <td>4.35 x 4.26 x 2.68</td>\n",
" <td>NaN</td>\n",
" <td>4.35</td>\n",
" <td>4.26</td>\n",
" <td>2.68</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.20</td>\n",
" <td>Good</td>\n",
" <td>G</td>\n",
" <td>SI2</td>\n",
" <td>60</td>\n",
" <td>64.4</td>\n",
" <td>GIA</td>\n",
" <td>3.74 x 3.67 x 2.38</td>\n",
" <td>NaN</td>\n",
" <td>3.74</td>\n",
" <td>3.67</td>\n",
" <td>2.38</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" carat cut color clarity table depth cert measurements price \\\n",
"0 0.25 V.Good K I1 59 63.7 GIA 3.96 x 3.95 x 2.52 NaN \n",
"1 0.23 Good G I1 61 58.1 GIA 4.00 x 4.05 x 2.30 NaN \n",
"2 0.34 Good J I2 58 58.7 GIA 4.56 x 4.53 x 2.67 NaN \n",
"3 0.21 V.Good D I1 60 60.6 GIA 3.80 x 3.82 x 2.31 NaN \n",
"4 0.31 V.Good K I1 59 62.2 EGL 4.35 x 4.26 x 2.68 NaN \n",
"5 0.20 Good G SI2 60 64.4 GIA 3.74 x 3.67 x 2.38 NaN \n",
"\n",
" x y z \n",
"0 3.96 3.95 2.52 \n",
"1 4.00 4.05 2.30 \n",
"2 4.56 4.53 2.67 \n",
"3 3.80 3.82 2.31 \n",
"4 4.35 4.26 2.68 \n",
"5 3.74 3.67 2.38 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bigdiamonds.head(6)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>carat</th>\n",
" <th>cut</th>\n",
" <th>color</th>\n",
" <th>clarity</th>\n",
" <th>table</th>\n",
" <th>depth</th>\n",
" <th>cert</th>\n",
" <th>measurements</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>598014</th>\n",
" <td>5.24</td>\n",
" <td>Ideal</td>\n",
" <td>I</td>\n",
" <td>SI1</td>\n",
" <td>60.0</td>\n",
" <td>59.5</td>\n",
" <td>GIA</td>\n",
" <td>11.35 x 11.43 x 6.78</td>\n",
" <td>99910</td>\n",
" <td>11.35</td>\n",
" <td>11.43</td>\n",
" <td>6.78</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598015</th>\n",
" <td>5.03</td>\n",
" <td>Ideal</td>\n",
" <td>H</td>\n",
" <td>SI1</td>\n",
" <td>58.0</td>\n",
" <td>62.2</td>\n",
" <td>HRD</td>\n",
" <td>6.82 x 10.94 x 10.98</td>\n",
" <td>99913</td>\n",
" <td>6.82</td>\n",
" <td>10.94</td>\n",
" <td>10.98</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598016</th>\n",
" <td>3.05</td>\n",
" <td>Ideal</td>\n",
" <td>D</td>\n",
" <td>VS2</td>\n",
" <td>59.0</td>\n",
" <td>61.3</td>\n",
" <td>GIA</td>\n",
" <td>5.73 x 9.33 x 9.36</td>\n",
" <td>99916</td>\n",
" <td>5.73</td>\n",
" <td>9.33</td>\n",
" <td>9.36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598017</th>\n",
" <td>3.01</td>\n",
" <td>Good</td>\n",
" <td>E</td>\n",
" <td>VS1</td>\n",
" <td>61.0</td>\n",
" <td>62.6</td>\n",
" <td>GIA</td>\n",
" <td>9.16 x 9.25 x 5.76</td>\n",
" <td>99920</td>\n",
" <td>9.16</td>\n",
" <td>9.25</td>\n",
" <td>5.76</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598018</th>\n",
" <td>3.01</td>\n",
" <td>Ideal</td>\n",
" <td>D</td>\n",
" <td>VS2</td>\n",
" <td>58.0</td>\n",
" <td>62.0</td>\n",
" <td>GIA</td>\n",
" <td>9.25 x 9.2 x 5.72</td>\n",
" <td>99920</td>\n",
" <td>9.25</td>\n",
" <td>9.20</td>\n",
" <td>5.72</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598019</th>\n",
" <td>3.02</td>\n",
" <td>Ideal</td>\n",
" <td>E</td>\n",
" <td>VVS2</td>\n",
" <td>58.0</td>\n",
" <td>59.8</td>\n",
" <td>HRD</td>\n",
" <td>9.43 x 9.51 x 5.66</td>\n",
" <td>99930</td>\n",
" <td>9.43</td>\n",
" <td>9.51</td>\n",
" <td>5.66</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598020</th>\n",
" <td>5.01</td>\n",
" <td>V.Good</td>\n",
" <td>I</td>\n",
" <td>VVS2</td>\n",
" <td>63.5</td>\n",
" <td>61.5</td>\n",
" <td>IGI</td>\n",
" <td>10.78 x 10.89 x 6.68</td>\n",
" <td>99942</td>\n",
" <td>10.78</td>\n",
" <td>10.89</td>\n",
" <td>6.68</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598021</th>\n",
" <td>3.43</td>\n",
" <td>Ideal</td>\n",
" <td>F</td>\n",
" <td>VS2</td>\n",
" <td>54.0</td>\n",
" <td>62.7</td>\n",
" <td>GIA</td>\n",
" <td>9.66 x 9.61 x 6.05</td>\n",
" <td>99960</td>\n",
" <td>9.66</td>\n",
" <td>9.61</td>\n",
" <td>6.05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598022</th>\n",
" <td>3.01</td>\n",
" <td>V.Good</td>\n",
" <td>E</td>\n",
" <td>VS1</td>\n",
" <td>58.0</td>\n",
" <td>62.9</td>\n",
" <td>GIA</td>\n",
" <td>9.15 x 9.19 x 5.77</td>\n",
" <td>99966</td>\n",
" <td>9.15</td>\n",
" <td>9.19</td>\n",
" <td>5.77</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598023</th>\n",
" <td>4.13</td>\n",
" <td>Ideal</td>\n",
" <td>H</td>\n",
" <td>IF</td>\n",
" <td>56.0</td>\n",
" <td>62.5</td>\n",
" <td>IGI</td>\n",
" <td>10.27 x 10.19 x 6.4</td>\n",
" <td>99990</td>\n",
" <td>10.27</td>\n",
" <td>10.19</td>\n",
" <td>6.40</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" carat cut color clarity table depth cert \\\n",
"598014 5.24 Ideal I SI1 60.0 59.5 GIA \n",
"598015 5.03 Ideal H SI1 58.0 62.2 HRD \n",
"598016 3.05 Ideal D VS2 59.0 61.3 GIA \n",
"598017 3.01 Good E VS1 61.0 62.6 GIA \n",
"598018 3.01 Ideal D VS2 58.0 62.0 GIA \n",
"598019 3.02 Ideal E VVS2 58.0 59.8 HRD \n",
"598020 5.01 V.Good I VVS2 63.5 61.5 IGI \n",
"598021 3.43 Ideal F VS2 54.0 62.7 GIA \n",
"598022 3.01 V.Good E VS1 58.0 62.9 GIA \n",
"598023 4.13 Ideal H IF 56.0 62.5 IGI \n",
"\n",
" measurements price x y z \n",
"598014 11.35 x 11.43 x 6.78 99910 11.35 11.43 6.78 \n",
"598015 6.82 x 10.94 x 10.98 99913 6.82 10.94 10.98 \n",
"598016 5.73 x 9.33 x 9.36 99916 5.73 9.33 9.36 \n",
"598017 9.16 x 9.25 x 5.76 99920 9.16 9.25 5.76 \n",
"598018 9.25 x 9.2 x 5.72 99920 9.25 9.20 5.72 \n",
"598019 9.43 x 9.51 x 5.66 99930 9.43 9.51 5.66 \n",
"598020 10.78 x 10.89 x 6.68 99942 10.78 10.89 6.68 \n",
"598021 9.66 x 9.61 x 6.05 99960 9.66 9.61 6.05 \n",
"598022 9.15 x 9.19 x 5.77 99966 9.15 9.19 5.77 \n",
"598023 10.27 x 10.19 x 6.4 99990 10.27 10.19 6.40 "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bigdiamonds.tail(10)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"598024"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b=len(bigdiamonds)\n",
"b"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"bigdiamonds.index.values"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([ 0, 1, 2, ..., 598021, 598022, 598023])"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bigdiamonds.index.values"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import numpy as np\n",
"rows = np.random.choice(bigdiamonds.index.values,0.0001*b)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sample_df = bigdiamonds.ix[rows]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>carat</th>\n",
" <th>cut</th>\n",
" <th>color</th>\n",
" <th>clarity</th>\n",
" <th>table</th>\n",
" <th>depth</th>\n",
" <th>cert</th>\n",
" <th>measurements</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>305469</th>\n",
" <td>0.80</td>\n",
" <td>Ideal</td>\n",
" <td>E</td>\n",
" <td>SI1</td>\n",
" <td>56.0</td>\n",
" <td>62.6</td>\n",
" <td>GIA</td>\n",
" <td>5.94 x 5.92 x 3.72</td>\n",
" <td>3660</td>\n",
" <td>5.94</td>\n",
" <td>5.92</td>\n",
" <td>3.72</td>\n",
" </tr>\n",
" <tr>\n",
" <th>307801</th>\n",
" <td>1.09</td>\n",
" <td>V.Good</td>\n",
" <td>J</td>\n",
" <td>SI2</td>\n",
" <td>57.0</td>\n",
" <td>63.3</td>\n",
" <td>GIA</td>\n",
" <td>6.49 x 6.51 x 4.11</td>\n",
" <td>3720</td>\n",
" <td>6.49</td>\n",
" <td>6.51</td>\n",
" <td>4.11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>133444</th>\n",
" <td>0.50</td>\n",
" <td>Ideal</td>\n",
" <td>D</td>\n",
" <td>I1</td>\n",
" <td>56.0</td>\n",
" <td>60.9</td>\n",
" <td>GIA</td>\n",
" <td>5.15 x 5.18 x 3.15</td>\n",
" <td>1090</td>\n",
" <td>5.15</td>\n",
" <td>5.18</td>\n",
" <td>3.15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>240781</th>\n",
" <td>0.80</td>\n",
" <td>Ideal</td>\n",
" <td>J</td>\n",
" <td>VVS1</td>\n",
" <td>55.5</td>\n",
" <td>62.8</td>\n",
" <td>IGI</td>\n",
" <td>5.88 x 5.93 x 3.71</td>\n",
" <td>2258</td>\n",
" <td>5.88</td>\n",
" <td>5.93</td>\n",
" <td>3.71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>490954</th>\n",
" <td>2.48</td>\n",
" <td>V.Good</td>\n",
" <td>H</td>\n",
" <td>SI2</td>\n",
" <td>62.0</td>\n",
" <td>59.4</td>\n",
" <td>IGI</td>\n",
" <td>8.78 x 8.70 x 5.22</td>\n",
" <td>19712</td>\n",
" <td>8.78</td>\n",
" <td>8.70</td>\n",
" <td>5.22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6957</th>\n",
" <td>0.30</td>\n",
" <td>Good</td>\n",
" <td>I</td>\n",
" <td>SI2</td>\n",
" <td>62.0</td>\n",
" <td>57.2</td>\n",
" <td>GIA</td>\n",
" <td>4.42 x 4.44 x 2.54</td>\n",
" <td>463</td>\n",
" <td>4.42</td>\n",
" <td>4.44</td>\n",
" <td>2.54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>298617</th>\n",
" <td>0.80</td>\n",
" <td>Ideal</td>\n",
" <td>I</td>\n",
" <td>VS1</td>\n",
" <td>56.0</td>\n",
" <td>62.7</td>\n",
" <td>GIA</td>\n",
" <td>5.88 x 5.93 x 3.70</td>\n",
" <td>3486</td>\n",
" <td>5.88</td>\n",
" <td>5.93</td>\n",
" <td>3.70</td>\n",
" </tr>\n",
" <tr>\n",
" <th>399801</th>\n",
" <td>1.26</td>\n",
" <td>Ideal</td>\n",
" <td>I</td>\n",
" <td>VVS1</td>\n",
" <td>60.0</td>\n",
" <td>59.2</td>\n",
" <td>GIA</td>\n",
" <td>7.03 x 7.07 x 4.18</td>\n",
" <td>7142</td>\n",
" <td>7.03</td>\n",
" <td>7.07</td>\n",
" <td>4.18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>527188</th>\n",
" <td>1.62</td>\n",
" <td>Ideal</td>\n",
" <td>I</td>\n",
" <td>IF</td>\n",
" <td>54.0</td>\n",
" <td>62.3</td>\n",
" <td>GIA</td>\n",
" <td>4.69 x 7.51 x 7.56</td>\n",
" <td>13730</td>\n",
" <td>4.69</td>\n",
" <td>7.51</td>\n",
" <td>7.56</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22885</th>\n",
" <td>0.51</td>\n",
" <td>Ideal</td>\n",
" <td>J</td>\n",
" <td>SI2</td>\n",
" <td>59.0</td>\n",
" <td>65.1</td>\n",
" <td>EGL</td>\n",
" <td>4.96 x 5.00 x 3.24</td>\n",
" <td>575</td>\n",
" <td>4.96</td>\n",
" <td>5.00</td>\n",
" <td>3.24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>451110</th>\n",
" <td>2.24</td>\n",
" <td>V.Good</td>\n",
" <td>F</td>\n",
" <td>SI2</td>\n",
" <td>62.0</td>\n",
" <td>58.7</td>\n",
" <td>EGL Intl.</td>\n",
" <td>8.44 x 8.53 x 4.99</td>\n",
" <td>11480</td>\n",
" <td>8.44</td>\n",
" <td>8.53</td>\n",
" <td>4.99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>545225</th>\n",
" <td>1.51</td>\n",
" <td>Ideal</td>\n",
" <td>H</td>\n",
" <td>VS1</td>\n",
" <td>59.0</td>\n",
" <td>60.3</td>\n",
" <td>GIA</td>\n",
" <td>7.48 x 7.44 x 4.50</td>\n",
" <td>16648</td>\n",
" <td>7.48</td>\n",
" <td>7.44</td>\n",
" <td>4.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33912</th>\n",
" <td>0.30</td>\n",
" <td>V.Good</td>\n",
" <td>G</td>\n",
" <td>SI2</td>\n",
" <td>57.0</td>\n",
" <td>61.3</td>\n",
" <td>GIA</td>\n",
" <td>4.35 x 4.32 x 2.66</td>\n",
" <td>630</td>\n",
" <td>4.35</td>\n",
" <td>4.32</td>\n",
" <td>2.66</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74245</th>\n",
" <td>0.33</td>\n",
" <td>Good</td>\n",
" <td>D</td>\n",
" <td>VS1</td>\n",
" <td>63.0</td>\n",
" <td>60.2</td>\n",
" <td>GIA</td>\n",
" <td>2.72 x 4.5 x 4.54</td>\n",
" <td>788</td>\n",
" <td>2.72</td>\n",
" <td>4.50</td>\n",
" <td>4.54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>282376</th>\n",
" <td>1.20</td>\n",
" <td>Good</td>\n",
" <td>J</td>\n",
" <td>SI2</td>\n",
" <td>55.5</td>\n",
" <td>67.5</td>\n",
" <td>IGI</td>\n",
" <td>6.52 x 6.5 x 4.39</td>\n",
" <td>3110</td>\n",
" <td>6.52</td>\n",
" <td>6.50</td>\n",
" <td>4.39</td>\n",
" </tr>\n",
" <tr>\n",
" <th>391287</th>\n",
" <td>1.09</td>\n",
" <td>Ideal</td>\n",
" <td>G</td>\n",
" <td>SI1</td>\n",
" <td>57.0</td>\n",
" <td>62.8</td>\n",
" <td>GIA</td>\n",
" <td>6.56 x 6.59 x 4.13</td>\n",
" <td>6683</td>\n",
" <td>6.56</td>\n",
" <td>6.59</td>\n",
" <td>4.13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>249409</th>\n",
" <td>0.70</td>\n",
" <td>Good</td>\n",
" <td>I</td>\n",
" <td>VS1</td>\n",
" <td>54.0</td>\n",
" <td>64.1</td>\n",
" <td>GIA</td>\n",
" <td>5.60 x 5.56 x 3.58</td>\n",
" <td>2374</td>\n",
" <td>5.60</td>\n",
" <td>5.56</td>\n",
" <td>3.58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>558075</th>\n",
" <td>3.01</td>\n",
" <td>Ideal</td>\n",
" <td>I</td>\n",
" <td>SI2</td>\n",
" <td>57.0</td>\n",
" <td>62.1</td>\n",
" <td>IGI</td>\n",
" <td>9.24 x 9.21 x 5.73</td>\n",
" <td>19790</td>\n",
" <td>9.24</td>\n",
" <td>9.21</td>\n",
" <td>5.73</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69016</th>\n",
" <td>0.31</td>\n",
" <td>V.Good</td>\n",
" <td>G</td>\n",
" <td>VS1</td>\n",
" <td>60.5</td>\n",
" <td>60.1</td>\n",
" <td>IGI</td>\n",
" <td>4.30 x 4.37 x 2.60</td>\n",
" <td>770</td>\n",
" <td>4.30</td>\n",
" <td>4.37</td>\n",
" <td>2.60</td>\n",
" </tr>\n",
" <tr>\n",
" <th>415138</th>\n",
" <td>1.20</td>\n",
" <td>Ideal</td>\n",
" <td>I</td>\n",
" <td>VVS2</td>\n",
" <td>58.0</td>\n",
" <td>62.7</td>\n",
" <td>GIA</td>\n",
" <td>6.76 x 6.73 x 4.23</td>\n",
" <td>8093</td>\n",
" <td>6.76</td>\n",
" <td>6.73</td>\n",
" <td>4.23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>491072</th>\n",
" <td>1.72</td>\n",
" <td>Ideal</td>\n",
" <td>G</td>\n",
" <td>VS1</td>\n",
" <td>59.0</td>\n",
" <td>61.9</td>\n",
" <td>GIA</td>\n",
" <td>7.64 x 7.69 x 4.74</td>\n",
" <td>19750</td>\n",
" <td>7.64</td>\n",
" <td>7.69</td>\n",
" <td>4.74</td>\n",
" </tr>\n",
" <tr>\n",
" <th>596386</th>\n",
" <td>3.91</td>\n",
" <td>Ideal</td>\n",
" <td>H</td>\n",
" <td>VS1</td>\n",
" <td>57.0</td>\n",
" <td>62.3</td>\n",
" <td>HRD</td>\n",
" <td>9.98 x 10.06 x 6.25</td>\n",
" <td>80802</td>\n",
" <td>9.98</td>\n",
" <td>10.06</td>\n",
" <td>6.25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>294857</th>\n",
" <td>0.92</td>\n",
" <td>Ideal</td>\n",
" <td>I</td>\n",
" <td>SI2</td>\n",
" <td>58.0</td>\n",
" <td>61.0</td>\n",
" <td>GIA</td>\n",
" <td>6.25 x 6.27 x 3.82</td>\n",
" <td>3395</td>\n",
" <td>6.25</td>\n",
" <td>6.27</td>\n",
" <td>3.82</td>\n",
" </tr>\n",
" <tr>\n",
" <th>363102</th>\n",
" <td>0.90</td>\n",
" <td>Ideal</td>\n",
" <td>E</td>\n",
" <td>VS2</td>\n",
" <td>56.0</td>\n",
" <td>61.6</td>\n",
" <td>GIA</td>\n",
" <td>6.21 x 6.17 x 0.00</td>\n",
" <td>5441</td>\n",
" <td>6.21</td>\n",
" <td>6.17</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>563954</th>\n",
" <td>1.78</td>\n",
" <td>Ideal</td>\n",
" <td>I</td>\n",
" <td>VVS1</td>\n",
" <td>56.0</td>\n",
" <td>62.0</td>\n",
" <td>GIA</td>\n",
" <td>7.72 x 7.76 x 4.80</td>\n",
" <td>21818</td>\n",
" <td>7.72</td>\n",
" <td>7.76</td>\n",
" <td>4.80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30781</th>\n",
" <td>0.21</td>\n",
" <td>Good</td>\n",
" <td>F</td>\n",
" <td>VS1</td>\n",
" <td>59.0</td>\n",
" <td>63.9</td>\n",
" <td>GIA</td>\n",
" <td>3.73 x 3.78 x 2.40</td>\n",
" <td>615</td>\n",
" <td>3.73</td>\n",
" <td>3.78</td>\n",
" <td>2.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>186068</th>\n",
" <td>0.47</td>\n",
" <td>Ideal</td>\n",
" <td>E</td>\n",
" <td>VVS2</td>\n",
" <td>59.0</td>\n",
" <td>61.0</td>\n",
" <td>GIA</td>\n",
" <td>4.99 x 5.02 x 3.05</td>\n",
" <td>1587</td>\n",
" <td>4.99</td>\n",
" <td>5.02</td>\n",
" <td>3.05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>368160</th>\n",
" <td>1.01</td>\n",
" <td>Good</td>\n",
" <td>H</td>\n",
" <td>VS2</td>\n",
" <td>55.0</td>\n",
" <td>58.3</td>\n",
" <td>GIA</td>\n",
" <td>6.61 x 6.64 x 3.86</td>\n",
" <td>5640</td>\n",
" <td>6.61</td>\n",
" <td>6.64</td>\n",
" <td>3.86</td>\n",
" </tr>\n",
" <tr>\n",
" <th>169005</th>\n",
" <td>0.50</td>\n",
" <td>V.Good</td>\n",
" <td>E</td>\n",
" <td>SI1</td>\n",
" <td>60.0</td>\n",
" <td>63.0</td>\n",
" <td>GIA</td>\n",
" <td>5.01 x 4.99 x 3.15</td>\n",
" <td>1397</td>\n",
" <td>5.01</td>\n",
" <td>4.99</td>\n",
" <td>3.15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>405652</th>\n",
" <td>1.23</td>\n",
" <td>Ideal</td>\n",
" <td>F</td>\n",
" <td>SI2</td>\n",
" <td>60.0</td>\n",
" <td>60.0</td>\n",
" <td>GIA</td>\n",
" <td>6.95 x 6.99 x 4.18</td>\n",
" <td>7477</td>\n",
" <td>6.95</td>\n",
" <td>6.99</td>\n",
" <td>4.18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3320</th>\n",
" <td>0.23</td>\n",
" <td>V.Good</td>\n",
" <td>H</td>\n",
" <td>VS1</td>\n",
" <td>59.0</td>\n",
" <td>61.9</td>\n",
" <td>GIA</td>\n",
" <td>3.92 x 3.88 x 2.41</td>\n",
" <td>408</td>\n",
" <td>3.92</td>\n",
" <td>3.88</td>\n",
" <td>2.41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38075</th>\n",
" <td>0.30</td>\n",
" <td>Ideal</td>\n",
" <td>F</td>\n",
" <td>VS1</td>\n",
" <td>57.0</td>\n",
" <td>61.8</td>\n",
" <td>GIA</td>\n",
" <td>4.30 x 4.32 x 2.66</td>\n",
" <td>648</td>\n",
" <td>4.30</td>\n",
" <td>4.32</td>\n",
" <td>2.66</td>\n",
" </tr>\n",
" <tr>\n",
" <th>134916</th>\n",
" <td>0.31</td>\n",
" <td>Ideal</td>\n",
" <td>D</td>\n",
" <td>IF</td>\n",
" <td>57.5</td>\n",
" <td>0.0</td>\n",
" <td>IGI</td>\n",
" <td>4.33 x 4.36 x 2.67</td>\n",
" <td>1100</td>\n",
" <td>4.33</td>\n",
" <td>4.36</td>\n",
" <td>2.67</td>\n",
" </tr>\n",
" <tr>\n",
" <th>449846</th>\n",
" <td>1.25</td>\n",
" <td>Ideal</td>\n",
" <td>F</td>\n",
" <td>VS1</td>\n",
" <td>56.0</td>\n",
" <td>62.2</td>\n",
" <td>GIA</td>\n",
" <td>6.87 x 6.90 x 4.20</td>\n",
" <td>11320</td>\n",
" <td>6.87</td>\n",
" <td>6.90</td>\n",
" <td>4.20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>231550</th>\n",
" <td>0.51</td>\n",
" <td>V.Good</td>\n",
" <td>E</td>\n",
" <td>VVS1</td>\n",
" <td>54.0</td>\n",
" <td>64.3</td>\n",
" <td>GIA</td>\n",
" <td>5 x 5.03 x 3.23</td>\n",
" <td>2140</td>\n",
" <td>5.00</td>\n",
" <td>5.03</td>\n",
" <td>3.23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>558801</th>\n",
" <td>2.17</td>\n",
" <td>Ideal</td>\n",
" <td>H</td>\n",
" <td>SI1</td>\n",
" <td>58.0</td>\n",
" <td>60.7</td>\n",
" <td>GIA</td>\n",
" <td>8.40 x 8.44 x 5.11</td>\n",
" <td>20003</td>\n",
" <td>8.40</td>\n",
" <td>8.44</td>\n",
" <td>5.11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>540067</th>\n",
" <td>1.50</td>\n",
" <td>Ideal</td>\n",
" <td>G</td>\n",
" <td>VS1</td>\n",
" <td>57.0</td>\n",
" <td>62.4</td>\n",
" <td>GIA</td>\n",
" <td>4.57 x 7.29 x 7.34</td>\n",
" <td>15698</td>\n",
" <td>4.57</td>\n",
" <td>7.29</td>\n",
" <td>7.34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60913</th>\n",
" <td>0.30</td>\n",
" <td>Ideal</td>\n",
" <td>G</td>\n",
" <td>VS1</td>\n",
" <td>57.0</td>\n",
" <td>63.3</td>\n",
" <td>GIA</td>\n",
" <td>4.24 x 4.25 x 2.69</td>\n",
" <td>740</td>\n",
" <td>4.24</td>\n",
" <td>4.25</td>\n",
" <td>2.69</td>\n",
" </tr>\n",
" <tr>\n",
" <th>411060</th>\n",
" <td>1.00</td>\n",
" <td>V.Good</td>\n",
" <td>E</td>\n",
" <td>VS1</td>\n",
" <td>63.0</td>\n",
" <td>61.5</td>\n",
" <td>GIA</td>\n",
" <td>6.33 x 6.38 x 3.91</td>\n",
" <td>7820</td>\n",
" <td>6.33</td>\n",
" <td>6.38</td>\n",
" <td>3.91</td>\n",
" </tr>\n",
" <tr>\n",
" <th>93017</th>\n",
" <td>0.51</td>\n",
" <td>Ideal</td>\n",
" <td>K</td>\n",
" <td>VS2</td>\n",
" <td>55.5</td>\n",
" <td>62.0</td>\n",
" <td>IGI</td>\n",
" <td>5.13 x 5.16 x 3.19</td>\n",
" <td>860</td>\n",
" <td>5.13</td>\n",
" <td>5.16</td>\n",
" <td>3.19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>373960</th>\n",
" <td>1.03</td>\n",
" <td>V.Good</td>\n",
" <td>J</td>\n",
" <td>VVS1</td>\n",
" <td>58.0</td>\n",
" <td>61.0</td>\n",
" <td>GIA</td>\n",
" <td>6.50 x 6.52 x 3.97</td>\n",
" <td>5887</td>\n",
" <td>6.50</td>\n",
" <td>6.52</td>\n",
" <td>3.97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246337</th>\n",
" <td>0.51</td>\n",
" <td>V.Good</td>\n",
" <td>F</td>\n",
" <td>VS1</td>\n",
" <td>56.0</td>\n",
" <td>64.2</td>\n",
" <td>GIA</td>\n",
" <td>5.04 x 5.06 x 3.24</td>\n",
" <td>2330</td>\n",
" <td>5.04</td>\n",
" <td>5.06</td>\n",
" <td>3.24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>163788</th>\n",
" <td>0.40</td>\n",
" <td>Ideal</td>\n",
" <td>H</td>\n",
" <td>VS1</td>\n",
" <td>59.0</td>\n",
" <td>61.6</td>\n",
" <td>GIA</td>\n",
" <td>4.70 x 4.72 x 2.90</td>\n",
" <td>1347</td>\n",
" <td>4.70</td>\n",
" <td>4.72</td>\n",
" <td>2.90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>254783</th>\n",
" <td>0.60</td>\n",
" <td>V.Good</td>\n",
" <td>D</td>\n",
" <td>VVS2</td>\n",
" <td>60.0</td>\n",
" <td>65.3</td>\n",
" <td>IGI</td>\n",
" <td>5.19 x 5.24 x 3.41</td>\n",
" <td>2455</td>\n",
" <td>5.19</td>\n",
" <td>5.24</td>\n",
" <td>3.41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>433219</th>\n",
" <td>1.03</td>\n",
" <td>Ideal</td>\n",
" <td>D</td>\n",
" <td>VS1</td>\n",
" <td>59.0</td>\n",
" <td>60.1</td>\n",
" <td>GIA</td>\n",
" <td>6.57 x 6.61 x 3.96</td>\n",
" <td>9544</td>\n",
" <td>6.57</td>\n",
" <td>6.61</td>\n",
" <td>3.96</td>\n",
" </tr>\n",
" <tr>\n",
" <th>431363</th>\n",
" <td>1.01</td>\n",
" <td>Ideal</td>\n",
" <td>E</td>\n",
" <td>VS2</td>\n",
" <td>59.0</td>\n",
" <td>61.1</td>\n",
" <td>GIA</td>\n",
" <td>6.43 x 6.47 x 3.94</td>\n",
" <td>9374</td>\n",
" <td>6.43</td>\n",
" <td>6.47</td>\n",
" <td>3.94</td>\n",
" </tr>\n",
" <tr>\n",
" <th>112540</th>\n",
" <td>0.32</td>\n",
" <td>Ideal</td>\n",
" <td>E</td>\n",
" <td>VS1</td>\n",
" <td>57.0</td>\n",
" <td>62.0</td>\n",
" <td>GIA</td>\n",
" <td>4.40 x 4.41 x 2.73</td>\n",
" <td>951</td>\n",
" <td>4.40</td>\n",
" <td>4.41</td>\n",
" <td>2.73</td>\n",
" </tr>\n",
" <tr>\n",
" <th>519667</th>\n",
" <td>2.73</td>\n",
" <td>Ideal</td>\n",
" <td>E</td>\n",
" <td>VS2</td>\n",
" <td>59.0</td>\n",
" <td>60.7</td>\n",
" <td>GIA</td>\n",
" <td>9.03 x 9.08 x 5.50</td>\n",
" <td>59376</td>\n",
" <td>9.03</td>\n",
" <td>9.08</td>\n",
" <td>5.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>441919</th>\n",
" <td>1.61</td>\n",
" <td>V.Good</td>\n",
" <td>I</td>\n",
" <td>VS2</td>\n",
" <td>59.0</td>\n",
" <td>62.8</td>\n",
" <td>GIA</td>\n",
" <td>7.4 x 7.44 x 4.66</td>\n",
" <td>10410</td>\n",
" <td>7.40</td>\n",
" <td>7.44</td>\n",
" <td>4.66</td>\n",
" </tr>\n",
" <tr>\n",
" <th>358518</th>\n",
" <td>1.16</td>\n",
" <td>Ideal</td>\n",
" <td>D</td>\n",
" <td>I1</td>\n",
" <td>56.0</td>\n",
" <td>63.1</td>\n",
" <td>GIA</td>\n",
" <td>4.22 x 6.68 x 6.7</td>\n",
" <td>5270</td>\n",
" <td>4.22</td>\n",
" <td>6.68</td>\n",
" <td>6.70</td>\n",
" </tr>\n",
" <tr>\n",
" <th>349542</th>\n",
" <td>0.81</td>\n",
" <td>V.Good</td>\n",
" <td>F</td>\n",
" <td>VS1</td>\n",
" <td>59.0</td>\n",
" <td>61.5</td>\n",
" <td>GIA</td>\n",
" <td>6.01 x 6.05 x 3.71</td>\n",
" <td>4960</td>\n",
" <td>6.01</td>\n",
" <td>6.05</td>\n",
" <td>3.71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>120824</th>\n",
" <td>0.40</td>\n",
" <td>Ideal</td>\n",
" <td>G</td>\n",
" <td>SI1</td>\n",
" <td>57.0</td>\n",
" <td>61.4</td>\n",
" <td>GIA</td>\n",
" <td>4.79 x 4.76 x 2.93</td>\n",
" <td>1000</td>\n",
" <td>4.79</td>\n",
" <td>4.76</td>\n",
" <td>2.93</td>\n",
" </tr>\n",
" <tr>\n",
" <th>378374</th>\n",
" <td>1.01</td>\n",
" <td>Ideal</td>\n",
" <td>H</td>\n",
" <td>SI1</td>\n",
" <td>61.0</td>\n",
" <td>59.2</td>\n",
" <td>GIA</td>\n",
" <td>6.54 x 6.57 x 3.88</td>\n",
" <td>6078</td>\n",
" <td>6.54</td>\n",
" <td>6.57</td>\n",
" <td>3.88</td>\n",
" </tr>\n",
" <tr>\n",
" <th>224521</th>\n",
" <td>0.60</td>\n",
" <td>Ideal</td>\n",
" <td>F</td>\n",
" <td>SI1</td>\n",
" <td>54.0</td>\n",
" <td>61.8</td>\n",
" <td>GIA</td>\n",
" <td>5.45 x 5.43 x 3.36</td>\n",
" <td>2050</td>\n",
" <td>5.45</td>\n",
" <td>5.43</td>\n",
" <td>3.36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>108314</th>\n",
" <td>0.50</td>\n",
" <td>Ideal</td>\n",
" <td>D</td>\n",
" <td>SI2</td>\n",
" <td>61.0</td>\n",
" <td>60.7</td>\n",
" <td>EGL USA</td>\n",
" <td>5.08 x 5.1 x 3.09</td>\n",
" <td>930</td>\n",
" <td>5.08</td>\n",
" <td>5.10</td>\n",
" <td>3.09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>545633</th>\n",
" <td>1.38</td>\n",
" <td>Ideal</td>\n",
" <td>F</td>\n",
" <td>VVS2</td>\n",
" <td>58.0</td>\n",
" <td>58.9</td>\n",
" <td>GIA</td>\n",
" <td>7.27 x 7.40 x 4.32</td>\n",
" <td>16730</td>\n",
" <td>7.27</td>\n",
" <td>7.40</td>\n",
" <td>4.32</td>\n",
" </tr>\n",
" <tr>\n",
" <th>275689</th>\n",
" <td>0.70</td>\n",
" <td>Ideal</td>\n",
" <td>F</td>\n",
" <td>VS2</td>\n",
" <td>60.0</td>\n",
" <td>60.5</td>\n",
" <td>GIA</td>\n",
" <td>5.77 x 5.72 x 3.48</td>\n",
" <td>2970</td>\n",
" <td>5.77</td>\n",
" <td>5.72</td>\n",
" <td>3.48</td>\n",
" </tr>\n",
" <tr>\n",
" <th>388558</th>\n",
" <td>1.20</td>\n",
" <td>V.Good</td>\n",
" <td>I</td>\n",
" <td>SI2</td>\n",
" <td>59.0</td>\n",
" <td>63.9</td>\n",
" <td>GIA</td>\n",
" <td>6.64 x 6.7 x 4.26</td>\n",
" <td>6550</td>\n",
" <td>6.64</td>\n",
" <td>6.70</td>\n",
" <td>4.26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>474378</th>\n",
" <td>1.62</td>\n",
" <td>V.Good</td>\n",
" <td>G</td>\n",
" <td>VS1</td>\n",
" <td>61.0</td>\n",
" <td>60.3</td>\n",
" <td>GIA</td>\n",
" <td>7.64 x 7.49 x 4.56</td>\n",
" <td>15309</td>\n",
" <td>7.64</td>\n",
" <td>7.49</td>\n",
" <td>4.56</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" carat cut color clarity table depth cert \\\n",
"305469 0.80 Ideal E SI1 56.0 62.6 GIA \n",
"307801 1.09 V.Good J SI2 57.0 63.3 GIA \n",
"133444 0.50 Ideal D I1 56.0 60.9 GIA \n",
"240781 0.80 Ideal J VVS1 55.5 62.8 IGI \n",
"490954 2.48 V.Good H SI2 62.0 59.4 IGI \n",
"6957 0.30 Good I SI2 62.0 57.2 GIA \n",
"298617 0.80 Ideal I VS1 56.0 62.7 GIA \n",
"399801 1.26 Ideal I VVS1 60.0 59.2 GIA \n",
"527188 1.62 Ideal I IF 54.0 62.3 GIA \n",
"22885 0.51 Ideal J SI2 59.0 65.1 EGL \n",
"451110 2.24 V.Good F SI2 62.0 58.7 EGL Intl. \n",
"545225 1.51 Ideal H VS1 59.0 60.3 GIA \n",
"33912 0.30 V.Good G SI2 57.0 61.3 GIA \n",
"74245 0.33 Good D VS1 63.0 60.2 GIA \n",
"282376 1.20 Good J SI2 55.5 67.5 IGI \n",
"391287 1.09 Ideal G SI1 57.0 62.8 GIA \n",
"249409 0.70 Good I VS1 54.0 64.1 GIA \n",
"558075 3.01 Ideal I SI2 57.0 62.1 IGI \n",
"69016 0.31 V.Good G VS1 60.5 60.1 IGI \n",
"415138 1.20 Ideal I VVS2 58.0 62.7 GIA \n",
"491072 1.72 Ideal G VS1 59.0 61.9 GIA \n",
"596386 3.91 Ideal H VS1 57.0 62.3 HRD \n",
"294857 0.92 Ideal I SI2 58.0 61.0 GIA \n",
"363102 0.90 Ideal E VS2 56.0 61.6 GIA \n",
"563954 1.78 Ideal I VVS1 56.0 62.0 GIA \n",
"30781 0.21 Good F VS1 59.0 63.9 GIA \n",
"186068 0.47 Ideal E VVS2 59.0 61.0 GIA \n",
"368160 1.01 Good H VS2 55.0 58.3 GIA \n",
"169005 0.50 V.Good E SI1 60.0 63.0 GIA \n",
"405652 1.23 Ideal F SI2 60.0 60.0 GIA \n",
"3320 0.23 V.Good H VS1 59.0 61.9 GIA \n",
"38075 0.30 Ideal F VS1 57.0 61.8 GIA \n",
"134916 0.31 Ideal D IF 57.5 0.0 IGI \n",
"449846 1.25 Ideal F VS1 56.0 62.2 GIA \n",
"231550 0.51 V.Good E VVS1 54.0 64.3 GIA \n",
"558801 2.17 Ideal H SI1 58.0 60.7 GIA \n",
"540067 1.50 Ideal G VS1 57.0 62.4 GIA \n",
"60913 0.30 Ideal G VS1 57.0 63.3 GIA \n",
"411060 1.00 V.Good E VS1 63.0 61.5 GIA \n",
"93017 0.51 Ideal K VS2 55.5 62.0 IGI \n",
"373960 1.03 V.Good J VVS1 58.0 61.0 GIA \n",
"246337 0.51 V.Good F VS1 56.0 64.2 GIA \n",
"163788 0.40 Ideal H VS1 59.0 61.6 GIA \n",
"254783 0.60 V.Good D VVS2 60.0 65.3 IGI \n",
"433219 1.03 Ideal D VS1 59.0 60.1 GIA \n",
"431363 1.01 Ideal E VS2 59.0 61.1 GIA \n",
"112540 0.32 Ideal E VS1 57.0 62.0 GIA \n",
"519667 2.73 Ideal E VS2 59.0 60.7 GIA \n",
"441919 1.61 V.Good I VS2 59.0 62.8 GIA \n",
"358518 1.16 Ideal D I1 56.0 63.1 GIA \n",
"349542 0.81 V.Good F VS1 59.0 61.5 GIA \n",
"120824 0.40 Ideal G SI1 57.0 61.4 GIA \n",
"378374 1.01 Ideal H SI1 61.0 59.2 GIA \n",
"224521 0.60 Ideal F SI1 54.0 61.8 GIA \n",
"108314 0.50 Ideal D SI2 61.0 60.7 EGL USA \n",
"545633 1.38 Ideal F VVS2 58.0 58.9 GIA \n",
"275689 0.70 Ideal F VS2 60.0 60.5 GIA \n",
"388558 1.20 V.Good I SI2 59.0 63.9 GIA \n",
"474378 1.62 V.Good G VS1 61.0 60.3 GIA \n",
"\n",
" measurements price x y z \n",
"305469 5.94 x 5.92 x 3.72 3660 5.94 5.92 3.72 \n",
"307801 6.49 x 6.51 x 4.11 3720 6.49 6.51 4.11 \n",
"133444 5.15 x 5.18 x 3.15 1090 5.15 5.18 3.15 \n",
"240781 5.88 x 5.93 x 3.71 2258 5.88 5.93 3.71 \n",
"490954 8.78 x 8.70 x 5.22 19712 8.78 8.70 5.22 \n",
"6957 4.42 x 4.44 x 2.54 463 4.42 4.44 2.54 \n",
"298617 5.88 x 5.93 x 3.70 3486 5.88 5.93 3.70 \n",
"399801 7.03 x 7.07 x 4.18 7142 7.03 7.07 4.18 \n",
"527188 4.69 x 7.51 x 7.56 13730 4.69 7.51 7.56 \n",
"22885 4.96 x 5.00 x 3.24 575 4.96 5.00 3.24 \n",
"451110 8.44 x 8.53 x 4.99 11480 8.44 8.53 4.99 \n",
"545225 7.48 x 7.44 x 4.50 16648 7.48 7.44 4.50 \n",
"33912 4.35 x 4.32 x 2.66 630 4.35 4.32 2.66 \n",
"74245 2.72 x 4.5 x 4.54 788 2.72 4.50 4.54 \n",
"282376 6.52 x 6.5 x 4.39 3110 6.52 6.50 4.39 \n",
"391287 6.56 x 6.59 x 4.13 6683 6.56 6.59 4.13 \n",
"249409 5.60 x 5.56 x 3.58 2374 5.60 5.56 3.58 \n",
"558075 9.24 x 9.21 x 5.73 19790 9.24 9.21 5.73 \n",
"69016 4.30 x 4.37 x 2.60 770 4.30 4.37 2.60 \n",
"415138 6.76 x 6.73 x 4.23 8093 6.76 6.73 4.23 \n",
"491072 7.64 x 7.69 x 4.74 19750 7.64 7.69 4.74 \n",
"596386 9.98 x 10.06 x 6.25 80802 9.98 10.06 6.25 \n",
"294857 6.25 x 6.27 x 3.82 3395 6.25 6.27 3.82 \n",
"363102 6.21 x 6.17 x 0.00 5441 6.21 6.17 NaN \n",
"563954 7.72 x 7.76 x 4.80 21818 7.72 7.76 4.80 \n",
"30781 3.73 x 3.78 x 2.40 615 3.73 3.78 2.40 \n",
"186068 4.99 x 5.02 x 3.05 1587 4.99 5.02 3.05 \n",
"368160 6.61 x 6.64 x 3.86 5640 6.61 6.64 3.86 \n",
"169005 5.01 x 4.99 x 3.15 1397 5.01 4.99 3.15 \n",
"405652 6.95 x 6.99 x 4.18 7477 6.95 6.99 4.18 \n",
"3320 3.92 x 3.88 x 2.41 408 3.92 3.88 2.41 \n",
"38075 4.30 x 4.32 x 2.66 648 4.30 4.32 2.66 \n",
"134916 4.33 x 4.36 x 2.67 1100 4.33 4.36 2.67 \n",
"449846 6.87 x 6.90 x 4.20 11320 6.87 6.90 4.20 \n",
"231550 5 x 5.03 x 3.23 2140 5.00 5.03 3.23 \n",
"558801 8.40 x 8.44 x 5.11 20003 8.40 8.44 5.11 \n",
"540067 4.57 x 7.29 x 7.34 15698 4.57 7.29 7.34 \n",
"60913 4.24 x 4.25 x 2.69 740 4.24 4.25 2.69 \n",
"411060 6.33 x 6.38 x 3.91 7820 6.33 6.38 3.91 \n",
"93017 5.13 x 5.16 x 3.19 860 5.13 5.16 3.19 \n",
"373960 6.50 x 6.52 x 3.97 5887 6.50 6.52 3.97 \n",
"246337 5.04 x 5.06 x 3.24 2330 5.04 5.06 3.24 \n",
"163788 4.70 x 4.72 x 2.90 1347 4.70 4.72 2.90 \n",
"254783 5.19 x 5.24 x 3.41 2455 5.19 5.24 3.41 \n",
"433219 6.57 x 6.61 x 3.96 9544 6.57 6.61 3.96 \n",
"431363 6.43 x 6.47 x 3.94 9374 6.43 6.47 3.94 \n",
"112540 4.40 x 4.41 x 2.73 951 4.40 4.41 2.73 \n",
"519667 9.03 x 9.08 x 5.50 59376 9.03 9.08 5.50 \n",
"441919 7.4 x 7.44 x 4.66 10410 7.40 7.44 4.66 \n",
"358518 4.22 x 6.68 x 6.7 5270 4.22 6.68 6.70 \n",
"349542 6.01 x 6.05 x 3.71 4960 6.01 6.05 3.71 \n",
"120824 4.79 x 4.76 x 2.93 1000 4.79 4.76 2.93 \n",
"378374 6.54 x 6.57 x 3.88 6078 6.54 6.57 3.88 \n",
"224521 5.45 x 5.43 x 3.36 2050 5.45 5.43 3.36 \n",
"108314 5.08 x 5.1 x 3.09 930 5.08 5.10 3.09 \n",
"545633 7.27 x 7.40 x 4.32 16730 7.27 7.40 4.32 \n",
"275689 5.77 x 5.72 x 3.48 2970 5.77 5.72 3.48 \n",
"388558 6.64 x 6.7 x 4.26 6550 6.64 6.70 4.26 \n",
"474378 7.64 x 7.49 x 4.56 15309 7.64 7.49 4.56 "
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sample_df"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"<bound method Series.describe of 0 0.25\n",
"1 0.23\n",
"2 0.34\n",
"3 0.21\n",
"4 0.31\n",
"5 0.20\n",
"6 0.20\n",
"7 0.22\n",
"8 0.23\n",
"9 0.20\n",
"10 0.24\n",
"11 0.20\n",
"12 0.22\n",
"13 0.23\n",
"14 0.23\n",
"15 0.23\n",
"16 0.20\n",
"17 0.29\n",
"18 0.31\n",
"19 0.21\n",
"20 0.31\n",
"21 0.22\n",
"22 0.21\n",
"23 0.21\n",
"24 0.20\n",
"25 0.20\n",
"26 0.20\n",
"27 0.20\n",
"28 0.23\n",
"29 0.23\n",
" ... \n",
"597994 4.65\n",
"597995 5.75\n",
"597996 3.01\n",
"597997 5.05\n",
"597998 3.54\n",
"597999 2.71\n",
"598000 3.65\n",
"598001 3.01\n",
"598002 3.86\n",
"598003 3.04\n",
"598004 3.04\n",
"598005 3.07\n",
"598006 5.33\n",
"598007 3.56\n",
"598008 3.43\n",
"598009 5.02\n",
"598010 5.01\n",
"598011 3.05\n",
"598012 5.59\n",
"598013 2.57\n",
"598014 5.24\n",
"598015 5.03\n",
"598016 3.05\n",
"598017 3.01\n",
"598018 3.01\n",
"598019 3.02\n",
"598020 5.01\n",
"598021 3.43\n",
"598022 3.01\n",
"598023 4.13\n",
"Name: carat, dtype: float64>"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bigdiamonds.carat.describe"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ajay = bigdiamonds.groupby(\"cut\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>carat</th>\n",
" <th>color</th>\n",
" <th>clarity</th>\n",
" <th>table</th>\n",
" <th>depth</th>\n",
" <th>cert</th>\n",
" <th>measurements</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cut</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Good</th>\n",
" <td>59680</td>\n",
" <td>59680</td>\n",
" <td>59680</td>\n",
" <td>59680</td>\n",
" <td>59680</td>\n",
" <td>59680</td>\n",
" <td>59674</td>\n",
" <td>59415</td>\n",
" <td>59485</td>\n",
" <td>59482</td>\n",
" <td>59495</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Ideal</th>\n",
" <td>369448</td>\n",
" <td>369448</td>\n",
" <td>369448</td>\n",
" <td>369448</td>\n",
" <td>369448</td>\n",
" <td>369448</td>\n",
" <td>369427</td>\n",
" <td>369346</td>\n",
" <td>368422</td>\n",
" <td>368400</td>\n",
" <td>367761</td>\n",
" </tr>\n",
" <tr>\n",
" <th>V.Good</th>\n",
" <td>168896</td>\n",
" <td>168896</td>\n",
" <td>168896</td>\n",
" <td>168896</td>\n",
" <td>168896</td>\n",
" <td>168896</td>\n",
" <td>168877</td>\n",
" <td>168550</td>\n",
" <td>168302</td>\n",
" <td>168290</td>\n",
" <td>168224</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" carat color clarity table depth cert measurements price \\\n",
"cut \n",
"Good 59680 59680 59680 59680 59680 59680 59674 59415 \n",
"Ideal 369448 369448 369448 369448 369448 369448 369427 369346 \n",
"V.Good 168896 168896 168896 168896 168896 168896 168877 168550 \n",
"\n",
" x y z \n",
"cut \n",
"Good 59485 59482 59495 \n",
"Ideal 368422 368400 367761 \n",
"V.Good 168302 168290 168224 "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ajay.count()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>carat</th>\n",
" <th>table</th>\n",
" <th>depth</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cut</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Good</th>\n",
" <td>0.900303</td>\n",
" <td>58.869811</td>\n",
" <td>62.324992</td>\n",
" <td>5254.791551</td>\n",
" <td>5.683471</td>\n",
" <td>5.816651</td>\n",
" <td>3.780346</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Ideal</th>\n",
" <td>1.120194</td>\n",
" <td>57.332537</td>\n",
" <td>61.008909</td>\n",
" <td>9919.276732</td>\n",
" <td>6.028667</td>\n",
" <td>6.304883</td>\n",
" <td>4.154810</td>\n",
" </tr>\n",
" <tr>\n",
" <th>V.Good</th>\n",
" <td>1.024760</td>\n",
" <td>57.846400</td>\n",
" <td>60.737810</td>\n",
" <td>7430.526817</td>\n",
" <td>6.016426</td>\n",
" <td>6.101187</td>\n",
" <td>3.857584</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" carat table depth price x y \\\n",
"cut \n",
"Good 0.900303 58.869811 62.324992 5254.791551 5.683471 5.816651 \n",
"Ideal 1.120194 57.332537 61.008909 9919.276732 6.028667 6.304883 \n",
"V.Good 1.024760 57.846400 60.737810 7430.526817 6.016426 6.101187 \n",
"\n",
" z \n",
"cut \n",
"Good 3.780346 \n",
"Ideal 4.154810 \n",
"V.Good 3.857584 "
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ajay.mean()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ajay = bigdiamonds.groupby(\"color\")"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>carat</th>\n",
" <th>depth</th>\n",
" <th>price</th>\n",
" <th>table</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" <tr>\n",
" <th>color</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"8\" valign=\"top\">D</th>\n",
" <th>count</th>\n",
" <td>73630.000000</td>\n",
" <td>73630.000000</td>\n",
" <td>73563.000000</td>\n",
" <td>73630.000000</td>\n",
" <td>73446.000000</td>\n",
" <td>73444.000000</td>\n",
" <td>73366.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>0.826618</td>\n",
" <td>61.152602</td>\n",
" <td>8266.345758</td>\n",
" <td>57.641772</td>\n",
" <td>5.554897</td>\n",
" <td>5.723287</td>\n",
" <td>3.700009</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.593038</td>\n",
" <td>7.032503</td>\n",
" <td>12856.623236</td>\n",
" <td>4.453634</td>\n",
" <td>1.337612</td>\n",
" <td>1.283894</td>\n",
" <td>1.059891</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.200000</td>\n",
" <td>0.000000</td>\n",
" <td>301.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.450000</td>\n",
" <td>2.600000</td>\n",
" <td>0.670000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.380000</td>\n",
" <td>60.900000</td>\n",
" <td>1050.000000</td>\n",
" <td>56.000000</td>\n",
" <td>4.470000</td>\n",
" <td>4.620000</td>\n",
" <td>2.890000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>0.630000</td>\n",
" <td>62.000000</td>\n",
" <td>2690.000000</td>\n",
" <td>58.000000</td>\n",
" <td>5.270000</td>\n",
" <td>5.500000</td>\n",
" <td>3.510000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>1.070000</td>\n",
" <td>62.700000</td>\n",
" <td>10613.500000</td>\n",
" <td>59.000000</td>\n",
" <td>6.480000</td>\n",
" <td>6.580000</td>\n",
" <td>4.220000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>8.080000</td>\n",
" <td>80.000000</td>\n",
" <td>99920.000000</td>\n",
" <td>72.000000</td>\n",
" <td>12.670000</td>\n",
" <td>12.760000</td>\n",
" <td>11.020000</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"8\" valign=\"top\">E</th>\n",
" <th>count</th>\n",
" <td>93483.000000</td>\n",
" <td>93483.000000</td>\n",
" <td>93374.000000</td>\n",
" <td>93483.000000</td>\n",
" <td>93185.000000</td>\n",
" <td>93187.000000</td>\n",
" <td>93135.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>0.831882</td>\n",
" <td>61.210083</td>\n",
" <td>7282.990286</td>\n",
" <td>57.591876</td>\n",
" <td>5.557539</td>\n",
" <td>5.722956</td>\n",
" <td>3.698196</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.617294</td>\n",
" <td>6.973828</td>\n",
" <td>11868.169639</td>\n",
" <td>5.022670</td>\n",
" <td>1.344313</td>\n",
" <td>1.306725</td>\n",
" <td>1.081532</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.200000</td>\n",
" <td>0.000000</td>\n",
" <td>301.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.330000</td>\n",
" <td>2.770000</td>\n",
" <td>2.100000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.380000</td>\n",
" <td>61.000000</td>\n",
" <td>960.000000</td>\n",
" <td>56.000000</td>\n",
" <td>4.470000</td>\n",
" <td>4.620000</td>\n",
" <td>2.880000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>0.640000</td>\n",
" <td>62.000000</td>\n",
" <td>2342.000000</td>\n",
" <td>58.000000</td>\n",
" <td>5.260000</td>\n",
" <td>5.510000</td>\n",
" <td>3.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>1.030000</td>\n",
" <td>62.700000</td>\n",
" <td>8247.000000</td>\n",
" <td>59.000000</td>\n",
" <td>6.420000</td>\n",
" <td>6.530000</td>\n",
" <td>4.170000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>7.710000</td>\n",
" <td>81.300000</td>\n",
" <td>99966.000000</td>\n",
" <td>71.000000</td>\n",
" <td>12.690000</td>\n",
" <td>12.690000</td>\n",
" <td>12.280000</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"8\" valign=\"top\">F</th>\n",
" <th>count</th>\n",
" <td>93573.000000</td>\n",
" <td>93573.000000</td>\n",
" <td>93452.000000</td>\n",
" <td>93573.000000</td>\n",
" <td>93161.000000</td>\n",
" <td>93159.000000</td>\n",
" <td>93134.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>0.941053</td>\n",
" <td>61.219863</td>\n",
" <td>8234.729744</td>\n",
" <td>57.568689</td>\n",
" <td>5.768627</td>\n",
" <td>5.950273</td>\n",
" <td>3.854868</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.697010</td>\n",
" <td>6.988582</td>\n",
" <td>12707.398437</td>\n",
" <td>5.169453</td>\n",
" <td>1.421849</td>\n",
" <td>1.391484</td>\n",
" <td>1.163764</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.200000</td>\n",
" <td>0.000000</td>\n",
" <td>300.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.150000</td>\n",
" <td>2.610000</td>\n",
" <td>2.080000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.400000</td>\n",
" <td>61.000000</td>\n",
" <td>1058.000000</td>\n",
" <td>56.000000</td>\n",
" <td>4.610000</td>\n",
" <td>4.740000</td>\n",
" <td>2.950000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>0.710000</td>\n",
" <td>62.000000</td>\n",
" <td>2966.000000</td>\n",
" <td>58.000000</td>\n",
" <td>5.590000</td>\n",
" <td>5.740000</td>\n",
" <td>3.630000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>1.230000</td>\n",
" <td>62.700000</td>\n",
" <td>10222.000000</td>\n",
" <td>59.000000</td>\n",
" <td>6.660000</td>\n",
" <td>6.890000</td>\n",
" <td>4.450000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>7.060000</td>\n",
" <td>73.600000</td>\n",
" <td>99960.000000</td>\n",
" <td>75.000000</td>\n",
" <td>12.570000</td>\n",
" <td>12.570000</td>\n",
" <td>12.050000</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"6\" valign=\"top\">G</th>\n",
" <th>count</th>\n",
" <td>96204.000000</td>\n",
" <td>96204.000000</td>\n",
" <td>96053.000000</td>\n",
" <td>96204.000000</td>\n",
" <td>95851.000000</td>\n",
" <td>95851.000000</td>\n",
" <td>95703.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>1.063841</td>\n",
" <td>61.142177</td>\n",
" <td>8984.200296</td>\n",
" <td>57.527934</td>\n",
" <td>5.995455</td>\n",
" <td>6.206307</td>\n",
" <td>4.041977</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.766115</td>\n",
" <td>7.296149</td>\n",
" <td>13193.848591</td>\n",
" <td>5.331322</td>\n",
" <td>1.483286</td>\n",
" <td>1.438840</td>\n",
" <td>1.228322</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.200000</td>\n",
" <td>0.000000</td>\n",
" <td>300.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.690000</td>\n",
" <td>3.210000</td>\n",
" <td>1.510000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.500000</td>\n",
" <td>61.000000</td>\n",
" <td>1270.000000</td>\n",
" <td>56.000000</td>\n",
" <td>4.750000</td>\n",
" <td>5.000000</td>\n",
" <td>3.130000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>0.900000</td>\n",
" <td>62.100000</td>\n",
" <td>3720.000000</td>\n",
" <td>58.000000</td>\n",
" <td>5.800000</td>\n",
" <td>6.100000</td>\n",
" <td>3.880000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"6\" valign=\"top\">I</th>\n",
" <th>std</th>\n",
" <td>0.892093</td>\n",
" <td>8.392595</td>\n",
" <td>13093.658702</td>\n",
" <td>5.114707</td>\n",
" <td>1.580171</td>\n",
" <td>1.504178</td>\n",
" <td>1.285164</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.200000</td>\n",
" <td>0.000000</td>\n",
" <td>301.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.760000</td>\n",
" <td>1.000000</td>\n",
" <td>0.040000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.640000</td>\n",
" <td>61.000000</td>\n",
" <td>1692.000000</td>\n",
" <td>56.500000</td>\n",
" <td>5.120000</td>\n",
" <td>5.500000</td>\n",
" <td>3.450000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>1.010000</td>\n",
" <td>62.100000</td>\n",
" <td>4717.000000</td>\n",
" <td>58.000000</td>\n",
" <td>6.230000</td>\n",
" <td>6.430000</td>\n",
" <td>4.030000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>1.730000</td>\n",
" <td>62.800000</td>\n",
" <td>13257.000000</td>\n",
" <td>59.000000</td>\n",
" <td>7.480000</td>\n",
" <td>7.710000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>7.530000</td>\n",
" <td>76.300000</td>\n",
" <td>99942.000000</td>\n",
" <td>75.900000</td>\n",
" <td>12.910000</td>\n",
" <td>12.790000</td>\n",
" <td>12.910000</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"8\" valign=\"top\">J</th>\n",
" <th>count</th>\n",
" <td>48709.000000</td>\n",
" <td>48709.000000</td>\n",
" <td>48645.000000</td>\n",
" <td>48709.000000</td>\n",
" <td>48630.000000</td>\n",
" <td>48626.000000</td>\n",
" <td>48581.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>1.347540</td>\n",
" <td>61.008290</td>\n",
" <td>9423.581170</td>\n",
" <td>57.851828</td>\n",
" <td>6.488372</td>\n",
" <td>6.715322</td>\n",
" <td>4.371004</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.980459</td>\n",
" <td>7.919312</td>\n",
" <td>13431.895319</td>\n",
" <td>4.175835</td>\n",
" <td>1.632161</td>\n",
" <td>1.555169</td>\n",
" <td>1.283817</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.200000</td>\n",
" <td>0.000000</td>\n",
" <td>300.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.700000</td>\n",
" <td>61.000000</td>\n",
" <td>1575.000000</td>\n",
" <td>56.500000</td>\n",
" <td>5.320000</td>\n",
" <td>5.610000</td>\n",
" <td>3.530000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>1.020000</td>\n",
" <td>62.100000</td>\n",
" <td>4697.000000</td>\n",
" <td>58.000000</td>\n",
" <td>6.340000</td>\n",
" <td>6.490000</td>\n",
" <td>4.050000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>1.760000</td>\n",
" <td>62.800000</td>\n",
" <td>11352.000000</td>\n",
" <td>59.000000</td>\n",
" <td>7.490000</td>\n",
" <td>7.760000</td>\n",
" <td>5.030000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>7.250000</td>\n",
" <td>76.500000</td>\n",
" <td>99806.000000</td>\n",
" <td>74.000000</td>\n",
" <td>12.660000</td>\n",
" <td>12.550000</td>\n",
" <td>12.620000</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"8\" valign=\"top\">K</th>\n",
" <th>count</th>\n",
" <td>25868.000000</td>\n",
" <td>25868.000000</td>\n",
" <td>25807.000000</td>\n",
" <td>25868.000000</td>\n",
" <td>25815.000000</td>\n",
" <td>25812.000000</td>\n",
" <td>25743.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>1.495065</td>\n",
" <td>60.612792</td>\n",
" <td>9694.256675</td>\n",
" <td>57.852439</td>\n",
" <td>6.674731</td>\n",
" <td>6.941670</td>\n",
" <td>4.554655</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.065830</td>\n",
" <td>9.452764</td>\n",
" <td>13241.275870</td>\n",
" <td>4.324384</td>\n",
" <td>1.721205</td>\n",
" <td>1.638528</td>\n",
" <td>1.389201</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.200000</td>\n",
" <td>0.000000</td>\n",
" <td>300.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.660000</td>\n",
" <td>1.000000</td>\n",
" <td>0.340000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.710000</td>\n",
" <td>61.000000</td>\n",
" <td>1429.000000</td>\n",
" <td>56.000000</td>\n",
" <td>5.390000</td>\n",
" <td>5.690000</td>\n",
" <td>3.570000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>1.110000</td>\n",
" <td>62.100000</td>\n",
" <td>4418.000000</td>\n",
" <td>58.000000</td>\n",
" <td>6.420000</td>\n",
" <td>6.650000</td>\n",
" <td>4.220000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>2.040000</td>\n",
" <td>62.900000</td>\n",
" <td>13702.500000</td>\n",
" <td>59.000000</td>\n",
" <td>8.050000</td>\n",
" <td>8.170000</td>\n",
" <td>5.230000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>8.900000</td>\n",
" <td>74.800000</td>\n",
" <td>99690.000000</td>\n",
" <td>71.000000</td>\n",
" <td>13.890000</td>\n",
" <td>13.890000</td>\n",
" <td>13.180000</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"8\" valign=\"top\">L</th>\n",
" <th>count</th>\n",
" <td>9656.000000</td>\n",
" <td>9656.000000</td>\n",
" <td>9649.000000</td>\n",
" <td>9656.000000</td>\n",
" <td>9643.000000</td>\n",
" <td>9643.000000</td>\n",
" <td>9632.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>1.363271</td>\n",
" <td>60.055976</td>\n",
" <td>7109.228314</td>\n",
" <td>58.116611</td>\n",
" <td>6.463055</td>\n",
" <td>6.696378</td>\n",
" <td>4.376560</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.078395</td>\n",
" <td>10.963340</td>\n",
" <td>11067.577753</td>\n",
" <td>4.192135</td>\n",
" <td>1.700404</td>\n",
" <td>1.634347</td>\n",
" <td>1.353431</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.200000</td>\n",
" <td>0.000000</td>\n",
" <td>305.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.460000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.630000</td>\n",
" <td>60.700000</td>\n",
" <td>1100.000000</td>\n",
" <td>57.000000</td>\n",
" <td>5.200000</td>\n",
" <td>5.485000</td>\n",
" <td>3.430000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>1.010000</td>\n",
" <td>62.100000</td>\n",
" <td>3017.000000</td>\n",
" <td>58.000000</td>\n",
" <td>6.150000</td>\n",
" <td>6.380000</td>\n",
" <td>4.030000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>1.700000</td>\n",
" <td>63.000000</td>\n",
" <td>7361.000000</td>\n",
" <td>60.000000</td>\n",
" <td>7.450000</td>\n",
" <td>7.650000</td>\n",
" <td>5.050000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>9.250000</td>\n",
" <td>74.900000</td>\n",
" <td>95814.000000</td>\n",
" <td>71.000000</td>\n",
" <td>13.240000</td>\n",
" <td>13.330000</td>\n",
" <td>12.620000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>72 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" carat depth price table \\\n",
"color \n",
"D count 73630.000000 73630.000000 73563.000000 73630.000000 \n",
" mean 0.826618 61.152602 8266.345758 57.641772 \n",
" std 0.593038 7.032503 12856.623236 4.453634 \n",
" min 0.200000 0.000000 301.000000 0.000000 \n",
" 25% 0.380000 60.900000 1050.000000 56.000000 \n",
" 50% 0.630000 62.000000 2690.000000 58.000000 \n",
" 75% 1.070000 62.700000 10613.500000 59.000000 \n",
" max 8.080000 80.000000 99920.000000 72.000000 \n",
"E count 93483.000000 93483.000000 93374.000000 93483.000000 \n",
" mean 0.831882 61.210083 7282.990286 57.591876 \n",
" std 0.617294 6.973828 11868.169639 5.022670 \n",
" min 0.200000 0.000000 301.000000 0.000000 \n",
" 25% 0.380000 61.000000 960.000000 56.000000 \n",
" 50% 0.640000 62.000000 2342.000000 58.000000 \n",
" 75% 1.030000 62.700000 8247.000000 59.000000 \n",
" max 7.710000 81.300000 99966.000000 71.000000 \n",
"F count 93573.000000 93573.000000 93452.000000 93573.000000 \n",
" mean 0.941053 61.219863 8234.729744 57.568689 \n",
" std 0.697010 6.988582 12707.398437 5.169453 \n",
" min 0.200000 0.000000 300.000000 0.000000 \n",
" 25% 0.400000 61.000000 1058.000000 56.000000 \n",
" 50% 0.710000 62.000000 2966.000000 58.000000 \n",
" 75% 1.230000 62.700000 10222.000000 59.000000 \n",
" max 7.060000 73.600000 99960.000000 75.000000 \n",
"G count 96204.000000 96204.000000 96053.000000 96204.000000 \n",
" mean 1.063841 61.142177 8984.200296 57.527934 \n",
" std 0.766115 7.296149 13193.848591 5.331322 \n",
" min 0.200000 0.000000 300.000000 0.000000 \n",
" 25% 0.500000 61.000000 1270.000000 56.000000 \n",
" 50% 0.900000 62.100000 3720.000000 58.000000 \n",
"... ... ... ... ... \n",
"I std 0.892093 8.392595 13093.658702 5.114707 \n",
" min 0.200000 0.000000 301.000000 0.000000 \n",
" 25% 0.640000 61.000000 1692.000000 56.500000 \n",
" 50% 1.010000 62.100000 4717.000000 58.000000 \n",
" 75% 1.730000 62.800000 13257.000000 59.000000 \n",
" max 7.530000 76.300000 99942.000000 75.900000 \n",
"J count 48709.000000 48709.000000 48645.000000 48709.000000 \n",
" mean 1.347540 61.008290 9423.581170 57.851828 \n",
" std 0.980459 7.919312 13431.895319 4.175835 \n",
" min 0.200000 0.000000 300.000000 0.000000 \n",
" 25% 0.700000 61.000000 1575.000000 56.500000 \n",
" 50% 1.020000 62.100000 4697.000000 58.000000 \n",
" 75% 1.760000 62.800000 11352.000000 59.000000 \n",
" max 7.250000 76.500000 99806.000000 74.000000 \n",
"K count 25868.000000 25868.000000 25807.000000 25868.000000 \n",
" mean 1.495065 60.612792 9694.256675 57.852439 \n",
" std 1.065830 9.452764 13241.275870 4.324384 \n",
" min 0.200000 0.000000 300.000000 0.000000 \n",
" 25% 0.710000 61.000000 1429.000000 56.000000 \n",
" 50% 1.110000 62.100000 4418.000000 58.000000 \n",
" 75% 2.040000 62.900000 13702.500000 59.000000 \n",
" max 8.900000 74.800000 99690.000000 71.000000 \n",
"L count 9656.000000 9656.000000 9649.000000 9656.000000 \n",
" mean 1.363271 60.055976 7109.228314 58.116611 \n",
" std 1.078395 10.963340 11067.577753 4.192135 \n",
" min 0.200000 0.000000 305.000000 0.000000 \n",
" 25% 0.630000 60.700000 1100.000000 57.000000 \n",
" 50% 1.010000 62.100000 3017.000000 58.000000 \n",
" 75% 1.700000 63.000000 7361.000000 60.000000 \n",
" max 9.250000 74.900000 95814.000000 71.000000 \n",
"\n",
" x y z \n",
"color \n",
"D count 73446.000000 73444.000000 73366.000000 \n",
" mean 5.554897 5.723287 3.700009 \n",
" std 1.337612 1.283894 1.059891 \n",
" min 0.450000 2.600000 0.670000 \n",
" 25% 4.470000 4.620000 2.890000 \n",
" 50% 5.270000 5.500000 3.510000 \n",
" 75% 6.480000 6.580000 4.220000 \n",
" max 12.670000 12.760000 11.020000 \n",
"E count 93185.000000 93187.000000 93135.000000 \n",
" mean 5.557539 5.722956 3.698196 \n",
" std 1.344313 1.306725 1.081532 \n",
" min 0.330000 2.770000 2.100000 \n",
" 25% 4.470000 4.620000 2.880000 \n",
" 50% 5.260000 5.510000 3.500000 \n",
" 75% 6.420000 6.530000 4.170000 \n",
" max 12.690000 12.690000 12.280000 \n",
"F count 93161.000000 93159.000000 93134.000000 \n",
" mean 5.768627 5.950273 3.854868 \n",
" std 1.421849 1.391484 1.163764 \n",
" min 0.150000 2.610000 2.080000 \n",
" 25% 4.610000 4.740000 2.950000 \n",
" 50% 5.590000 5.740000 3.630000 \n",
" 75% 6.660000 6.890000 4.450000 \n",
" max 12.570000 12.570000 12.050000 \n",
"G count 95851.000000 95851.000000 95703.000000 \n",
" mean 5.995455 6.206307 4.041977 \n",
" std 1.483286 1.438840 1.228322 \n",
" min 0.690000 3.210000 1.510000 \n",
" 25% 4.750000 5.000000 3.130000 \n",
" 50% 5.800000 6.100000 3.880000 \n",
"... ... ... ... \n",
"I std 1.580171 1.504178 1.285164 \n",
" min 0.760000 1.000000 0.040000 \n",
" 25% 5.120000 5.500000 3.450000 \n",
" 50% 6.230000 6.430000 4.030000 \n",
" 75% 7.480000 7.710000 5.000000 \n",
" max 12.910000 12.790000 12.910000 \n",
"J count 48630.000000 48626.000000 48581.000000 \n",
" mean 6.488372 6.715322 4.371004 \n",
" std 1.632161 1.555169 1.283817 \n",
" min 1.000000 1.000000 1.000000 \n",
" 25% 5.320000 5.610000 3.530000 \n",
" 50% 6.340000 6.490000 4.050000 \n",
" 75% 7.490000 7.760000 5.030000 \n",
" max 12.660000 12.550000 12.620000 \n",
"K count 25815.000000 25812.000000 25743.000000 \n",
" mean 6.674731 6.941670 4.554655 \n",
" std 1.721205 1.638528 1.389201 \n",
" min 0.660000 1.000000 0.340000 \n",
" 25% 5.390000 5.690000 3.570000 \n",
" 50% 6.420000 6.650000 4.220000 \n",
" 75% 8.050000 8.170000 5.230000 \n",
" max 13.890000 13.890000 13.180000 \n",
"L count 9643.000000 9643.000000 9632.000000 \n",
" mean 6.463055 6.696378 4.376560 \n",
" std 1.700404 1.634347 1.353431 \n",
" min 1.000000 1.000000 0.460000 \n",
" 25% 5.200000 5.485000 3.430000 \n",
" 50% 6.150000 6.380000 4.030000 \n",
" 75% 7.450000 7.650000 5.050000 \n",
" max 13.240000 13.330000 12.620000 \n",
"\n",
"[72 rows x 7 columns]"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ajay.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>carat</th>\n",
" <th>cut</th>\n",
" <th>color</th>\n",
" <th>table</th>\n",
" <th>depth</th>\n",
" <th>cert</th>\n",
" <th>measurements</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" </tr>\n",
" <tr>\n",
" <th>clarity</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>I1</th>\n",
" <td>14524</td>\n",
" <td>14524</td>\n",
" <td>14524</td>\n",
" <td>14524</td>\n",
" <td>14524</td>\n",
" <td>14524</td>\n",
" <td>14521</td>\n",
" <td>14355</td>\n",
" <td>14472</td>\n",
" <td>14469</td>\n",
" <td>14439</td>\n",
" </tr>\n",
" <tr>\n",
" <th>I2</th>\n",
" <td>2302</td>\n",
" <td>2302</td>\n",
" <td>2302</td>\n",
" <td>2302</td>\n",
" <td>2302</td>\n",
" <td>2302</td>\n",
" <td>2302</td>\n",
" <td>2284</td>\n",
" <td>2293</td>\n",
" <td>2293</td>\n",
" <td>2294</td>\n",
" </tr>\n",
" <tr>\n",
" <th>IF</th>\n",
" <td>31157</td>\n",
" <td>31157</td>\n",
" <td>31157</td>\n",
" <td>31157</td>\n",
" <td>31157</td>\n",
" <td>31157</td>\n",
" <td>31157</td>\n",
" <td>31156</td>\n",
" <td>31107</td>\n",
" <td>31107</td>\n",
" <td>30905</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SI1</th>\n",
" <td>116631</td>\n",
" <td>116631</td>\n",
" <td>116631</td>\n",
" <td>116631</td>\n",
" <td>116631</td>\n",
" <td>116631</td>\n",
" <td>116616</td>\n",
" <td>116468</td>\n",
" <td>116271</td>\n",
" <td>116264</td>\n",
" <td>116247</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SI2</th>\n",
" <td>104300</td>\n",
" <td>104300</td>\n",
" <td>104300</td>\n",
" <td>104300</td>\n",
" <td>104300</td>\n",
" <td>104300</td>\n",
" <td>104295</td>\n",
" <td>104104</td>\n",
" <td>104001</td>\n",
" <td>103996</td>\n",
" <td>104001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>VS1</th>\n",
" <td>97730</td>\n",
" <td>97730</td>\n",
" <td>97730</td>\n",
" <td>97730</td>\n",
" <td>97730</td>\n",
" <td>97730</td>\n",
" <td>97723</td>\n",
" <td>97677</td>\n",
" <td>97451</td>\n",
" <td>97450</td>\n",
" <td>97333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>VS2</th>\n",
" <td>111082</td>\n",
" <td>111082</td>\n",
" <td>111082</td>\n",
" <td>111082</td>\n",
" <td>111082</td>\n",
" <td>111082</td>\n",
" <td>111073</td>\n",
" <td>110997</td>\n",
" <td>110752</td>\n",
" <td>110751</td>\n",
" <td>110675</td>\n",
" </tr>\n",
" <tr>\n",
" <th>VVS1</th>\n",
" <td>54798</td>\n",
" <td>54798</td>\n",
" <td>54798</td>\n",
" <td>54798</td>\n",
" <td>54798</td>\n",
" <td>54798</td>\n",
" <td>54798</td>\n",
" <td>54790</td>\n",
" <td>54583</td>\n",
" <td>54586</td>\n",
" <td>54423</td>\n",
" </tr>\n",
" <tr>\n",
" <th>VVS2</th>\n",
" <td>65500</td>\n",
" <td>65500</td>\n",
" <td>65500</td>\n",
" <td>65500</td>\n",
" <td>65500</td>\n",
" <td>65500</td>\n",
" <td>65493</td>\n",
" <td>65480</td>\n",
" <td>65279</td>\n",
" <td>65256</td>\n",
" <td>65163</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" carat cut color table depth cert measurements price \\\n",
"clarity \n",
"I1 14524 14524 14524 14524 14524 14524 14521 14355 \n",
"I2 2302 2302 2302 2302 2302 2302 2302 2284 \n",
"IF 31157 31157 31157 31157 31157 31157 31157 31156 \n",
"SI1 116631 116631 116631 116631 116631 116631 116616 116468 \n",
"SI2 104300 104300 104300 104300 104300 104300 104295 104104 \n",
"VS1 97730 97730 97730 97730 97730 97730 97723 97677 \n",
"VS2 111082 111082 111082 111082 111082 111082 111073 110997 \n",
"VVS1 54798 54798 54798 54798 54798 54798 54798 54790 \n",
"VVS2 65500 65500 65500 65500 65500 65500 65493 65480 \n",
"\n",
" x y z \n",
"clarity \n",
"I1 14472 14469 14439 \n",
"I2 2293 2293 2294 \n",
"IF 31107 31107 30905 \n",
"SI1 116271 116264 116247 \n",
"SI2 104001 103996 104001 \n",
"VS1 97451 97450 97333 \n",
"VS2 110752 110751 110675 \n",
"VVS1 54583 54586 54423 \n",
"VVS2 65279 65256 65163 "
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"clarity = bigdiamonds.groupby(\"clarity\")\n",
"clarity.count()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"clarity\n",
"I1 14355\n",
"I2 2284\n",
"IF 31156\n",
"SI1 116468\n",
"SI2 104104\n",
"VS1 97677\n",
"VS2 110997\n",
"VVS1 54790\n",
"VVS2 65480\n",
"Name: price, dtype: int64"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"clarity.count().price"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"clarity\n",
"I1 2465.762382\n",
"I2 2611.824869\n",
"IF 11559.865419\n",
"SI1 7909.712840\n",
"SI2 6986.413317\n",
"VS1 10270.489491\n",
"VS2 9808.277449\n",
"VVS1 8467.331922\n",
"VVS2 9505.287538\n",
"Name: price, dtype: float64"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"clarity.mean().price"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"clarity\n",
"I1 1307.0\n",
"I2 1611.0\n",
"IF 4483.0\n",
"SI1 3770.0\n",
"SI2 3532.5\n",
"VS1 4000.0\n",
"VS2 4050.0\n",
"VVS1 2865.5\n",
"VVS2 3112.0\n",
"Name: price, dtype: float64"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"clarity.median().price"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"clarity\n",
"I1 51870\n",
"I2 39379\n",
"IF 99990\n",
"SI1 99913\n",
"SI2 99778\n",
"VS1 99966\n",
"VS2 99960\n",
"VVS1 99930\n",
"VVS2 99942\n",
"Name: price, dtype: float64"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"clarity.max().price"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import matplotlib.pyplot as mt"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pylab as pl"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[<matplotlib.lines.Line2D at 0x7f00c473df10>]"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pl.plot(bigdiamonds.carat,bigdiamonds.price)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pl.show()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x7f00ba098110>"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pl.scatter(bigdiamonds.carat,bigdiamonds.price)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"pl.show()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"bigdiamonds[\"newdata\"] = bigdiamonds[\"price\"]/bigdiamonds[\"carat\"]"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"newdiamonds = bigdiamonds[[\"carat\",\"cut\"]]"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>carat</th>\n",
" <th>cut</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.25</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.23</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.34</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.21</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.31</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.20</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.20</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>0.22</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.23</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.20</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>0.24</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>0.20</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>0.22</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>0.23</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>0.23</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>0.23</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>0.20</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>0.29</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>0.31</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>0.21</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>0.31</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>0.22</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>0.21</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>0.21</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>0.20</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>0.20</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>0.20</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>0.20</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>0.23</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>0.23</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>597994</th>\n",
" <td>4.65</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>597995</th>\n",
" <td>5.75</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>597996</th>\n",
" <td>3.01</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>597997</th>\n",
" <td>5.05</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>597998</th>\n",
" <td>3.54</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>597999</th>\n",
" <td>2.71</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598000</th>\n",
" <td>3.65</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598001</th>\n",
" <td>3.01</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598002</th>\n",
" <td>3.86</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598003</th>\n",
" <td>3.04</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598004</th>\n",
" <td>3.04</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598005</th>\n",
" <td>3.07</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598006</th>\n",
" <td>5.33</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598007</th>\n",
" <td>3.56</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598008</th>\n",
" <td>3.43</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598009</th>\n",
" <td>5.02</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598010</th>\n",
" <td>5.01</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598011</th>\n",
" <td>3.05</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598012</th>\n",
" <td>5.59</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598013</th>\n",
" <td>2.57</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598014</th>\n",
" <td>5.24</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598015</th>\n",
" <td>5.03</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598016</th>\n",
" <td>3.05</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598017</th>\n",
" <td>3.01</td>\n",
" <td>Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598018</th>\n",
" <td>3.01</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598019</th>\n",
" <td>3.02</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598020</th>\n",
" <td>5.01</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598021</th>\n",
" <td>3.43</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598022</th>\n",
" <td>3.01</td>\n",
" <td>V.Good</td>\n",
" </tr>\n",
" <tr>\n",
" <th>598023</th>\n",
" <td>4.13</td>\n",
" <td>Ideal</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>598024 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" carat cut\n",
"0 0.25 V.Good\n",
"1 0.23 Good\n",
"2 0.34 Good\n",
"3 0.21 V.Good\n",
"4 0.31 V.Good\n",
"5 0.20 Good\n",
"6 0.20 Good\n",
"7 0.22 V.Good\n",
"8 0.23 V.Good\n",
"9 0.20 Good\n",
"10 0.24 V.Good\n",
"11 0.20 V.Good\n",
"12 0.22 V.Good\n",
"13 0.23 Good\n",
"14 0.23 V.Good\n",
"15 0.23 V.Good\n",
"16 0.20 Good\n",
"17 0.29 V.Good\n",
"18 0.31 V.Good\n",
"19 0.21 Good\n",
"20 0.31 V.Good\n",
"21 0.22 Good\n",
"22 0.21 Ideal\n",
"23 0.21 Good\n",
"24 0.20 V.Good\n",
"25 0.20 Good\n",
"26 0.20 Good\n",
"27 0.20 V.Good\n",
"28 0.23 Ideal\n",
"29 0.23 Good\n",
"... ... ...\n",
"597994 4.65 Ideal\n",
"597995 5.75 Ideal\n",
"597996 3.01 Good\n",
"597997 5.05 V.Good\n",
"597998 3.54 Ideal\n",
"597999 2.71 Ideal\n",
"598000 3.65 Ideal\n",
"598001 3.01 V.Good\n",
"598002 3.86 Ideal\n",
"598003 3.04 Ideal\n",
"598004 3.04 Ideal\n",
"598005 3.07 Ideal\n",
"598006 5.33 Ideal\n",
"598007 3.56 Ideal\n",
"598008 3.43 Ideal\n",
"598009 5.02 Good\n",
"598010 5.01 V.Good\n",
"598011 3.05 V.Good\n",
"598012 5.59 Ideal\n",
"598013 2.57 Ideal\n",
"598014 5.24 Ideal\n",
"598015 5.03 Ideal\n",
"598016 3.05 Ideal\n",
"598017 3.01 Good\n",
"598018 3.01 Ideal\n",
"598019 3.02 Ideal\n",
"598020 5.01 V.Good\n",
"598021 3.43 Ideal\n",
"598022 3.01 V.Good\n",
"598023 4.13 Ideal\n",
"\n",
"[598024 rows x 2 columns]"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"newdiamonds"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"Sorted = bigdiamonds.sort()#Working only if there is no missing values"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>carat</th>\n",
" <th>cut</th>\n",
" <th>color</th>\n",
" <th>clarity</th>\n",
" <th>table</th>\n",
" <th>depth</th>\n",
" <th>cert</th>\n",
" <th>measurements</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" <th>newdata</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.25</td>\n",
" <td>V.Good</td>\n",
" <td>K</td>\n",
" <td>I1</td>\n",
" <td>59</td>\n",
" <td>63.7</td>\n",
" <td>GIA</td>\n",
" <td>3.96 x 3.95 x 2.52</td>\n",
" <td>NaN</td>\n",
" <td>3.96</td>\n",
" <td>3.95</td>\n",
" <td>2.52</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.23</td>\n",
" <td>Good</td>\n",
" <td>G</td>\n",
" <td>I1</td>\n",
" <td>61</td>\n",
" <td>58.1</td>\n",
" <td>GIA</td>\n",
" <td>4.00 x 4.05 x 2.30</td>\n",
" <td>NaN</td>\n",
" <td>4.00</td>\n",
" <td>4.05</td>\n",
" <td>2.30</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.34</td>\n",
" <td>Good</td>\n",
" <td>J</td>\n",
" <td>I2</td>\n",
" <td>58</td>\n",
" <td>58.7</td>\n",
" <td>GIA</td>\n",
" <td>4.56 x 4.53 x 2.67</td>\n",
" <td>NaN</td>\n",
" <td>4.56</td>\n",
" <td>4.53</td>\n",
" <td>2.67</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.21</td>\n",
" <td>V.Good</td>\n",
" <td>D</td>\n",
" <td>I1</td>\n",
" <td>60</td>\n",
" <td>60.6</td>\n",
" <td>GIA</td>\n",
" <td>3.80 x 3.82 x 2.31</td>\n",
" <td>NaN</td>\n",
" <td>3.80</td>\n",
" <td>3.82</td>\n",
" <td>2.31</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.31</td>\n",
" <td>V.Good</td>\n",
" <td>K</td>\n",
" <td>I1</td>\n",
" <td>59</td>\n",
" <td>62.2</td>\n",
" <td>EGL</td>\n",
" <td>4.35 x 4.26 x 2.68</td>\n",
" <td>NaN</td>\n",
" <td>4.35</td>\n",
" <td>4.26</td>\n",
" <td>2.68</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.20</td>\n",
" <td>Good</td>\n",
" <td>G</td>\n",
" <td>SI2</td>\n",
" <td>60</td>\n",
" <td>64.4</td>\n",
" <td>GIA</td>\n",
" <td>3.74 x 3.67 x 2.38</td>\n",
" <td>NaN</td>\n",
" <td>3.74</td>\n",
" <td>3.67</td>\n",
" <td>2.38</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" carat cut color clarity table depth cert measurements price \\\n",
"0 0.25 V.Good K I1 59 63.7 GIA 3.96 x 3.95 x 2.52 NaN \n",
"1 0.23 Good G I1 61 58.1 GIA 4.00 x 4.05 x 2.30 NaN \n",
"2 0.34 Good J I2 58 58.7 GIA 4.56 x 4.53 x 2.67 NaN \n",
"3 0.21 V.Good D I1 60 60.6 GIA 3.80 x 3.82 x 2.31 NaN \n",
"4 0.31 V.Good K I1 59 62.2 EGL 4.35 x 4.26 x 2.68 NaN \n",
"5 0.20 Good G SI2 60 64.4 GIA 3.74 x 3.67 x 2.38 NaN \n",
"\n",
" x y z newdata \n",
"0 3.96 3.95 2.52 NaN \n",
"1 4.00 4.05 2.30 NaN \n",
"2 4.56 4.53 2.67 NaN \n",
"3 3.80 3.82 2.31 NaN \n",
"4 4.35 4.26 2.68 NaN \n",
"5 3.74 3.67 2.38 NaN "
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Sorted.head(6)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"Sorted = bigdiamonds.sort([\"price\"], ascending = True)#Working only if there is no missing values"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>carat</th>\n",
" <th>cut</th>\n",
" <th>color</th>\n",
" <th>clarity</th>\n",
" <th>table</th>\n",
" <th>depth</th>\n",
" <th>cert</th>\n",
" <th>measurements</th>\n",
" <th>price</th>\n",
" <th>x</th>\n",
" <th>y</th>\n",
" <th>z</th>\n",
" <th>newdata</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>493</th>\n",
" <td>0.24</td>\n",
" <td>V.Good</td>\n",
" <td>G</td>\n",
" <td>SI1</td>\n",
" <td>61.0</td>\n",
" <td>58.9</td>\n",
" <td>GIA</td>\n",
" <td>4.09 x 4.10 x 2.41</td>\n",
" <td>300</td>\n",
" <td>4.09</td>\n",
" <td>4.10</td>\n",
" <td>2.41</td>\n",
" <td>1250.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>494</th>\n",
" <td>0.31</td>\n",
" <td>V.Good</td>\n",
" <td>K</td>\n",
" <td>SI2</td>\n",
" <td>59.0</td>\n",
" <td>60.2</td>\n",
" <td>GIA</td>\n",
" <td>4.40 x 4.42 x 2.65</td>\n",
" <td>300</td>\n",
" <td>4.40</td>\n",
" <td>4.42</td>\n",
" <td>2.65</td>\n",
" <td>967.741935</td>\n",
" </tr>\n",
" <tr>\n",
" <th>495</th>\n",
" <td>0.26</td>\n",
" <td>Good</td>\n",
" <td>J</td>\n",
" <td>VS2</td>\n",
" <td>56.5</td>\n",
" <td>64.1</td>\n",
" <td>IGI</td>\n",
" <td>4.01 x 4.05 x 2.58</td>\n",
" <td>300</td>\n",
" <td>4.01</td>\n",
" <td>4.05</td>\n",
" <td>2.58</td>\n",
" <td>1153.846154</td>\n",
" </tr>\n",
" <tr>\n",
" <th>496</th>\n",
" <td>0.24</td>\n",
" <td>Ideal</td>\n",
" <td>G</td>\n",
" <td>SI1</td>\n",
" <td>55.0</td>\n",
" <td>61.3</td>\n",
" <td>GIA</td>\n",
" <td>4.01 x 4.03 x 2.47</td>\n",
" <td>300</td>\n",
" <td>4.01</td>\n",
" <td>4.03</td>\n",
" <td>2.47</td>\n",
" <td>1250.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>497</th>\n",
" <td>0.30</td>\n",
" <td>Good</td>\n",
" <td>H</td>\n",
" <td>I1</td>\n",
" <td>57.0</td>\n",
" <td>62.2</td>\n",
" <td>GIA</td>\n",
" <td>4.21 x 4.24 x 2.63</td>\n",
" <td>300</td>\n",
" <td>4.21</td>\n",
" <td>4.24</td>\n",
" <td>2.63</td>\n",
" <td>1000.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>498</th>\n",
" <td>0.34</td>\n",
" <td>Good</td>\n",
" <td>F</td>\n",
" <td>I1</td>\n",
" <td>66.0</td>\n",
" <td>55.0</td>\n",
" <td>GIA</td>\n",
" <td>4.75 x 4.61 x 2.57</td>\n",
" <td>300</td>\n",
" <td>4.75</td>\n",
" <td>4.61</td>\n",
" <td>2.57</td>\n",
" <td>882.352941</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" carat cut color clarity table depth cert measurements \\\n",
"493 0.24 V.Good G SI1 61.0 58.9 GIA 4.09 x 4.10 x 2.41 \n",
"494 0.31 V.Good K SI2 59.0 60.2 GIA 4.40 x 4.42 x 2.65 \n",
"495 0.26 Good J VS2 56.5 64.1 IGI 4.01 x 4.05 x 2.58 \n",
"496 0.24 Ideal G SI1 55.0 61.3 GIA 4.01 x 4.03 x 2.47 \n",
"497 0.30 Good H I1 57.0 62.2 GIA 4.21 x 4.24 x 2.63 \n",
"498 0.34 Good F I1 66.0 55.0 GIA 4.75 x 4.61 x 2.57 \n",
"\n",
" price x y z newdata \n",
"493 300 4.09 4.10 2.41 1250.000000 \n",
"494 300 4.40 4.42 2.65 967.741935 \n",
"495 300 4.01 4.05 2.58 1153.846154 \n",
"496 300 4.01 4.03 2.47 1250.000000 \n",
"497 300 4.21 4.24 2.63 1000.000000 \n",
"498 300 4.75 4.61 2.57 882.352941 "
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Sorted.head(6)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import urllib\n",
"url = \"http://goo.gl/j0Rvxq\""
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "IOError",
"evalue": "[Errno socket error] [Errno 110] Connection timed out",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mIOError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-41-c16c23d3fa36>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mraw_data\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0murllib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0murlopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32m/home/farheen/anaconda/lib/python2.7/urllib.pyc\u001b[0m in \u001b[0;36murlopen\u001b[1;34m(url, data, proxies, context)\u001b[0m\n\u001b[0;32m 85\u001b[0m \u001b[0mopener\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_urlopener\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 86\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdata\u001b[0m \u001b[1;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 87\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mopener\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 88\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 89\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mopener\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/farheen/anaconda/lib/python2.7/urllib.pyc\u001b[0m in \u001b[0;36mopen\u001b[1;34m(self, fullurl, data)\u001b[0m\n\u001b[0;32m 211\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 212\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdata\u001b[0m \u001b[1;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 213\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 214\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 215\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/farheen/anaconda/lib/python2.7/urllib.pyc\u001b[0m in \u001b[0;36mopen_http\u001b[1;34m(self, url, data)\u001b[0m\n\u001b[0;32m 362\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 363\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdata\u001b[0m \u001b[1;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 364\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhttp_error\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrcode\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrmsg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 365\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 366\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhttp_error\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrcode\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrmsg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/farheen/anaconda/lib/python2.7/urllib.pyc\u001b[0m in \u001b[0;36mhttp_error\u001b[1;34m(self, url, fp, errcode, errmsg, headers, data)\u001b[0m\n\u001b[0;32m 375\u001b[0m \u001b[0mmethod\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 376\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdata\u001b[0m \u001b[1;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 377\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmethod\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrcode\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrmsg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 378\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 379\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmethod\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrcode\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrmsg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/farheen/anaconda/lib/python2.7/urllib.pyc\u001b[0m in \u001b[0;36mhttp_error_301\u001b[1;34m(self, url, fp, errcode, errmsg, headers, data)\u001b[0m\n\u001b[0;32m 669\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mhttp_error_301\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrcode\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrmsg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 670\u001b[0m \u001b[1;34m\"\"\"Error 301 -- also relocated (permanently).\"\"\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 671\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhttp_error_302\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrcode\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrmsg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 672\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 673\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mhttp_error_303\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrcode\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrmsg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/farheen/anaconda/lib/python2.7/urllib.pyc\u001b[0m in \u001b[0;36mhttp_error_302\u001b[1;34m(self, url, fp, errcode, errmsg, headers, data)\u001b[0m\n\u001b[0;32m 639\u001b[0m \"Internal Server Error: Redirect Recursion\", headers)\n\u001b[0;32m 640\u001b[0m result = self.redirect_internal(url, fp, errcode, errmsg, headers,\n\u001b[1;32m--> 641\u001b[1;33m data)\n\u001b[0m\u001b[0;32m 642\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtries\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 643\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/farheen/anaconda/lib/python2.7/urllib.pyc\u001b[0m in \u001b[0;36mredirect_internal\u001b[1;34m(self, url, fp, errcode, errmsg, headers, data)\u001b[0m\n\u001b[0;32m 665\u001b[0m headers)\n\u001b[0;32m 666\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 667\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnewurl\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 668\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 669\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mhttp_error_301\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrcode\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrmsg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/farheen/anaconda/lib/python2.7/urllib.pyc\u001b[0m in \u001b[0;36mopen\u001b[1;34m(self, fullurl, data)\u001b[0m\n\u001b[0;32m 211\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 212\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdata\u001b[0m \u001b[1;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 213\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 214\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 215\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/farheen/anaconda/lib/python2.7/urllib.pyc\u001b[0m in \u001b[0;36mopen_http\u001b[1;34m(self, url, data)\u001b[0m\n\u001b[0;32m 348\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mrealhost\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mh\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mputheader\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Host'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrealhost\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 349\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0margs\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0maddheaders\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mh\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mputheader\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 350\u001b[1;33m \u001b[0mh\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mendheaders\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 351\u001b[0m \u001b[0merrcode\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrmsg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheaders\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mh\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetreply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 352\u001b[0m \u001b[0mfp\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mh\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetfile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/farheen/anaconda/lib/python2.7/httplib.pyc\u001b[0m in \u001b[0;36mendheaders\u001b[1;34m(self, message_body)\u001b[0m\n\u001b[0;32m 1047\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1048\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mCannotSendHeader\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1049\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_send_output\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage_body\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1050\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1051\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mrequest\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m{\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/farheen/anaconda/lib/python2.7/httplib.pyc\u001b[0m in \u001b[0;36m_send_output\u001b[1;34m(self, message_body)\u001b[0m\n\u001b[0;32m 891\u001b[0m \u001b[0mmsg\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[0mmessage_body\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 892\u001b[0m \u001b[0mmessage_body\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 893\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 894\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mmessage_body\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 895\u001b[0m \u001b[1;31m#message_body was not a string (i.e. it is a file) and\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/farheen/anaconda/lib/python2.7/httplib.pyc\u001b[0m in \u001b[0;36msend\u001b[1;34m(self, data)\u001b[0m\n\u001b[0;32m 853\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msock\u001b[0m \u001b[1;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 854\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mauto_open\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 855\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 856\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 857\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mNotConnected\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/farheen/anaconda/lib/python2.7/httplib.pyc\u001b[0m in \u001b[0;36mconnect\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 830\u001b[0m \u001b[1;34m\"\"\"Connect to the host and port specified in __init__.\"\"\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 831\u001b[0m self.sock = self._create_connection((self.host,self.port),\n\u001b[1;32m--> 832\u001b[1;33m self.timeout, self.source_address)\n\u001b[0m\u001b[0;32m 833\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 834\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_tunnel_host\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/farheen/anaconda/lib/python2.7/socket.pyc\u001b[0m in \u001b[0;36mcreate_connection\u001b[1;34m(address, timeout, source_address)\u001b[0m\n\u001b[0;32m 573\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 574\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0merr\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 575\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0merr\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 576\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 577\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0merror\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"getaddrinfo returns an empty list\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mIOError\u001b[0m: [Errno socket error] [Errno 110] Connection timed out"
]
}
],
"source": [
"raw_data = urllib.urlopen(url)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"dataset = np.loadtxt(raw_data,delimiter = \",\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"dataset\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"dataset2 = pd.DataFrame(dataset[0:,0:])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"dataset2.head(6)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"dataset2 = pd.DataFrame(dataset[0:,3:])\n",
"dataset2.head(6)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"dataset3 = np.array(dataset2)#to pas it as input to sci kit"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"rows = np.random.choice(bigdiamonds.index.values,0.01 * b)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"rows"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"type(dataset)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"print(type(bigdiamonds))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"b = len(bigdiamonds)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"b"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"type(bigdiamonds.index.values)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ajay = bigdiamonds.groupby(\"cut\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ajay"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ajay.median()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ajay.max().newdata"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ajay.describe().carat"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pd.crosstab?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ajay2 = pd.crosstab(bigdiamonds.color,bigdiamonds.cut)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ajay2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import matplotlib as mt\n"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pylab as pl"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"pl.plot(bigdiamonds.carat,bigdiamonds.price)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pl.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from ggplot import *"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"\n",
"ggplot(bigdiamonds,aes(y='carat',x='price')) + geom_point()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"rows = np.random.choice(bigdiamonds.index.values,.0001*b)\n",
"smalldata = bigdiamonds.ix[rows]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ggplot(sample_df,aes(x='price',y='carat')) + geom_point() + geom_abline(intercept=1000,slope=.3,size=1,color='green')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ggplot(bigdiamonds,aes(x='price',y='carat',ymin=0,ymax=4)) + geom_area() + geom_point(color='coral')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"max(sample_df.carat)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"min(sample_df.carat)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"pl.scatter(bigdiamonds.price,bigdiamonds.carat)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"p=ggplot(aes(x='price',y='carat')) , data = clean"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"cleaneddata =bigdiamonds.dropna().reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"p = ggplot(aes(x='price',y='carat',color='cut'), data = cleaneddata)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import seaborn as sns"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sns.jointplot"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"sns.kdeplot(bigdiamonds[\"price\"],shade=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from pandas.stats.api import ols"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"res = ols(y=bigdiamonds[\"price\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"lastdiamonds.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"max(bigdiamonds.groupby('color').count().price)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment