Created
November 23, 2013 16:48
-
-
Save epifanio/7616935 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "Untitled7" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "import pandas as pd\nimport datetime as dt", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# example dataset", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "data=\"\"\"cruiseid year station month day date lat lon depth_w taxon count \nAA8704 1987 1 04 13 13-APR-87 35.85 -75.48 18 Centropages_typicus 75343 \nAA8704 1987 1 04 13 13-APR-87 35.85 -75.48 18 Gastropoda 0 \nAA8704 1987 1 04 13 13-APR-87 35.85 -75.48 18 Calanus_finmarchicus 2340 \nAA8704 1987 1 04 13 13-APR-87 35.85 -75.48 18 Pseudocalanus_spp. 53816 \nAA8704 1987 1 05 13 13-MAY-87 35.85 -75.48 18 Penilia_avirostris 0 \nAA8704 1987 1 05 13 13-MAY-87 35.85 -75.48 18 Temora_longicornis 86574 \nAA8704 1987 1 05 13 13-APY-87 35.85 -75.48 18 Centropages_hamatus 88446 \nAA8704 1987 1 05 13 13-MAY-87 35.85 -75.48 18 Echinodermata 0 \nAA8704 1987 1 06 13 13-JUN-87 35.85 -75.48 18 Appendicularia 0 \nAA8704 1987 1 06 13 13-JUN-87 35.85 -75.48 18 Paracalanus_parvus 0 \nAA8704 1987 1 07 13 13-JUL-87 35.85 -75.48 18 Gastropoda 0 \nAA8704 1987 1 07 13 13-JUL-87 35.85 -75.48 18 Acartia_spp. 5616 \nAA8704 1987 1 07 13 13-JUL-87 35.85 -75.48 18 Metridia_lucens 468 \nAA8704 1987 1 08 13 13-AUG-87 35.85 -75.48 18 Evadne_spp. 0 \nAA8704 1987 1 08 13 13-AUG-87 35.85 -75.48 18 Salpa 0 \nAA8704 1987 1 08 13 13-AUG-87 35.85 -75.48 18 Oithona_spp. 468 \n\"\"\"\ndatafile = open('data.txt','w')\ndatafile.write(data)\ndatafile.close()", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "parse = lambda x: dt.datetime.strptime(x, '%d-%m-%Y')\ndf = pd.read_csv('data.txt',index_col=0, header=False, parse_dates={\"Datetime\" : [1,3,4]}, skipinitialspace=True, sep=' ', skiprows=0)", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# query the df where taxon is equal to 'Calanus_finmarchicus' or 'Gastropoda'\ndf[(df.taxon == 'Calanus_finmarchicus') | (df.taxon == 'Gastropoda')]", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>cruiseid</th>\n <th>station</th>\n <th>date</th>\n <th>lat</th>\n <th>lon</th>\n <th>depth_w</th>\n <th>taxon</th>\n <th>count</th>\n <th>Unnamed: 11</th>\n </tr>\n <tr>\n <th>Datetime</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td><strong>1987-04-13</strong></td>\n <td> AA8704</td>\n <td> 1</td>\n <td> 13-APR-87</td>\n <td> 35.85</td>\n <td>-75.48</td>\n <td> 18</td>\n <td> Gastropoda</td>\n <td> 0</td>\n <td>NaN</td>\n </tr>\n <tr>\n <td><strong>1987-04-13</strong></td>\n <td> AA8704</td>\n <td> 1</td>\n <td> 13-APR-87</td>\n <td> 35.85</td>\n <td>-75.48</td>\n <td> 18</td>\n <td> Calanus_finmarchicus</td>\n <td> 2340</td>\n <td>NaN</td>\n </tr>\n <tr>\n <td><strong>1987-07-13</strong></td>\n <td> AA8704</td>\n <td> 1</td>\n <td> 13-JUL-87</td>\n <td> 35.85</td>\n <td>-75.48</td>\n <td> 18</td>\n <td> Gastropoda</td>\n <td> 0</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n</div>", | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 5, | |
"text": " cruiseid station date lat lon depth_w taxon \\\nDatetime \n1987-04-13 AA8704 1 13-APR-87 35.85 -75.48 18 Gastropoda \n1987-04-13 AA8704 1 13-APR-87 35.85 -75.48 18 Calanus_finmarchicus \n1987-07-13 AA8704 1 13-JUL-87 35.85 -75.48 18 Gastropoda \n\n count Unnamed: 11 \nDatetime \n1987-04-13 0 NaN \n1987-04-13 2340 NaN \n1987-07-13 0 NaN " | |
} | |
], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# how to query the dataframe :\n# WHERE dataframe.taxon LIKE 'Calanus_finmarchicus' OR dataframe.taxone LIKE 'Gastropoda' AND dataframe.month LIKE 4\n# ?", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# in munmpy i can do something like that :\n#\nimport numpy as np\ndata = np.genfromtxt('data.txt', dtype=[('cruiseid','S6'), ('year','i4'), ('station','i4'), ('month','i4'), ('day','i4'), ('date','S9'), ('lat','f8'), ('lon','f8'), ('depth_w','i8'), ('taxon','S60'), ('count','i8')], skip_header=1)\nselection = [np.where((data['taxon']=='Calanus_finmarchicus') | (data['taxon']=='Gastropoda') & ((data['month']==4) | (data['month']==3)))[0]]\ndata[selection]\n\n\n", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 7, | |
"text": "array([ ('AA8704', 1987, 1, 4, 13, '13-APR-87', 35.85, -75.48, 18, 'Gastropoda', 0),\n ('AA8704', 1987, 1, 4, 13, '13-APR-87', 35.85, -75.48, 18, 'Calanus_finmarchicus', 2340)], \n dtype=[('cruiseid', '|S6'), ('year', '<i4'), ('station', '<i4'), ('month', '<i4'), ('day', '<i4'), ('date', '|S9'), ('lat', '<f8'), ('lon', '<f8'), ('depth_w', '<i8'), ('taxon', '|S60'), ('count', '<i8')])" | |
} | |
], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# how can i do the same in pandas ?", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "dataset = data[selection]\ndesired = pd.DataFrame(dataset)", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "dataset", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 10, | |
"text": "array([ ('AA8704', 1987, 1, 4, 13, '13-APR-87', 35.85, -75.48, 18, 'Gastropoda', 0),\n ('AA8704', 1987, 1, 4, 13, '13-APR-87', 35.85, -75.48, 18, 'Calanus_finmarchicus', 2340)], \n dtype=[('cruiseid', '|S6'), ('year', '<i4'), ('station', '<i4'), ('month', '<i4'), ('day', '<i4'), ('date', '|S9'), ('lat', '<f8'), ('lon', '<f8'), ('depth_w', '<i8'), ('taxon', '|S60'), ('count', '<i8')])" | |
} | |
], | |
"prompt_number": 10 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "desired # but with index the Datetime", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>cruiseid</th>\n <th>year</th>\n <th>station</th>\n <th>month</th>\n <th>day</th>\n <th>date</th>\n <th>lat</th>\n <th>lon</th>\n <th>depth_w</th>\n <th>taxon</th>\n <th>count</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td><strong>0</strong></td>\n <td> AA8704</td>\n <td> 1987</td>\n <td> 1</td>\n <td> 4</td>\n <td> 13</td>\n <td> 13-APR-87</td>\n <td> 35.85</td>\n <td>-75.48</td>\n <td> 18</td>\n <td> Gastropoda</td>\n <td> 0</td>\n </tr>\n <tr>\n <td><strong>1</strong></td>\n <td> AA8704</td>\n <td> 1987</td>\n <td> 1</td>\n <td> 4</td>\n <td> 13</td>\n <td> 13-APR-87</td>\n <td> 35.85</td>\n <td>-75.48</td>\n <td> 18</td>\n <td> Calanus_finmarchicus</td>\n <td> 2340</td>\n </tr>\n </tbody>\n</table>\n</div>", | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 11, | |
"text": " cruiseid year station month day date lat lon depth_w \\\n0 AA8704 1987 1 4 13 13-APR-87 35.85 -75.48 18 \n1 AA8704 1987 1 4 13 13-APR-87 35.85 -75.48 18 \n\n taxon count \n0 Gastropoda 0 \n1 Calanus_finmarchicus 2340 " | |
} | |
], | |
"prompt_number": 11 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 11 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment