Skip to content

Instantly share code, notes, and snippets.

@epifanio
Created November 23, 2013 16:48
Show Gist options
  • Save epifanio/7616935 to your computer and use it in GitHub Desktop.
Save epifanio/7616935 to your computer and use it in GitHub Desktop.
{
"metadata": {
"name": "Untitled7"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": "import pandas as pd\nimport datetime as dt",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": "# example dataset",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": "data=\"\"\"cruiseid year station month day date lat lon depth_w taxon count \nAA8704 1987 1 04 13 13-APR-87 35.85 -75.48 18 Centropages_typicus 75343 \nAA8704 1987 1 04 13 13-APR-87 35.85 -75.48 18 Gastropoda 0 \nAA8704 1987 1 04 13 13-APR-87 35.85 -75.48 18 Calanus_finmarchicus 2340 \nAA8704 1987 1 04 13 13-APR-87 35.85 -75.48 18 Pseudocalanus_spp. 53816 \nAA8704 1987 1 05 13 13-MAY-87 35.85 -75.48 18 Penilia_avirostris 0 \nAA8704 1987 1 05 13 13-MAY-87 35.85 -75.48 18 Temora_longicornis 86574 \nAA8704 1987 1 05 13 13-APY-87 35.85 -75.48 18 Centropages_hamatus 88446 \nAA8704 1987 1 05 13 13-MAY-87 35.85 -75.48 18 Echinodermata 0 \nAA8704 1987 1 06 13 13-JUN-87 35.85 -75.48 18 Appendicularia 0 \nAA8704 1987 1 06 13 13-JUN-87 35.85 -75.48 18 Paracalanus_parvus 0 \nAA8704 1987 1 07 13 13-JUL-87 35.85 -75.48 18 Gastropoda 0 \nAA8704 1987 1 07 13 13-JUL-87 35.85 -75.48 18 Acartia_spp. 5616 \nAA8704 1987 1 07 13 13-JUL-87 35.85 -75.48 18 Metridia_lucens 468 \nAA8704 1987 1 08 13 13-AUG-87 35.85 -75.48 18 Evadne_spp. 0 \nAA8704 1987 1 08 13 13-AUG-87 35.85 -75.48 18 Salpa 0 \nAA8704 1987 1 08 13 13-AUG-87 35.85 -75.48 18 Oithona_spp. 468 \n\"\"\"\ndatafile = open('data.txt','w')\ndatafile.write(data)\ndatafile.close()",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": "parse = lambda x: dt.datetime.strptime(x, '%d-%m-%Y')\ndf = pd.read_csv('data.txt',index_col=0, header=False, parse_dates={\"Datetime\" : [1,3,4]}, skipinitialspace=True, sep=' ', skiprows=0)",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": "# query the df where taxon is equal to 'Calanus_finmarchicus' or 'Gastropoda'\ndf[(df.taxon == 'Calanus_finmarchicus') | (df.taxon == 'Gastropoda')]",
"language": "python",
"metadata": {},
"outputs": [
{
"html": "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>cruiseid</th>\n <th>station</th>\n <th>date</th>\n <th>lat</th>\n <th>lon</th>\n <th>depth_w</th>\n <th>taxon</th>\n <th>count</th>\n <th>Unnamed: 11</th>\n </tr>\n <tr>\n <th>Datetime</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td><strong>1987-04-13</strong></td>\n <td> AA8704</td>\n <td> 1</td>\n <td> 13-APR-87</td>\n <td> 35.85</td>\n <td>-75.48</td>\n <td> 18</td>\n <td> Gastropoda</td>\n <td> 0</td>\n <td>NaN</td>\n </tr>\n <tr>\n <td><strong>1987-04-13</strong></td>\n <td> AA8704</td>\n <td> 1</td>\n <td> 13-APR-87</td>\n <td> 35.85</td>\n <td>-75.48</td>\n <td> 18</td>\n <td> Calanus_finmarchicus</td>\n <td> 2340</td>\n <td>NaN</td>\n </tr>\n <tr>\n <td><strong>1987-07-13</strong></td>\n <td> AA8704</td>\n <td> 1</td>\n <td> 13-JUL-87</td>\n <td> 35.85</td>\n <td>-75.48</td>\n <td> 18</td>\n <td> Gastropoda</td>\n <td> 0</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n</div>",
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": " cruiseid station date lat lon depth_w taxon \\\nDatetime \n1987-04-13 AA8704 1 13-APR-87 35.85 -75.48 18 Gastropoda \n1987-04-13 AA8704 1 13-APR-87 35.85 -75.48 18 Calanus_finmarchicus \n1987-07-13 AA8704 1 13-JUL-87 35.85 -75.48 18 Gastropoda \n\n count Unnamed: 11 \nDatetime \n1987-04-13 0 NaN \n1987-04-13 2340 NaN \n1987-07-13 0 NaN "
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": "# how to query the dataframe :\n# WHERE dataframe.taxon LIKE 'Calanus_finmarchicus' OR dataframe.taxone LIKE 'Gastropoda' AND dataframe.month LIKE 4\n# ?",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": "# in munmpy i can do something like that :\n#\nimport numpy as np\ndata = np.genfromtxt('data.txt', dtype=[('cruiseid','S6'), ('year','i4'), ('station','i4'), ('month','i4'), ('day','i4'), ('date','S9'), ('lat','f8'), ('lon','f8'), ('depth_w','i8'), ('taxon','S60'), ('count','i8')], skip_header=1)\nselection = [np.where((data['taxon']=='Calanus_finmarchicus') | (data['taxon']=='Gastropoda') & ((data['month']==4) | (data['month']==3)))[0]]\ndata[selection]\n\n\n",
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": "array([ ('AA8704', 1987, 1, 4, 13, '13-APR-87', 35.85, -75.48, 18, 'Gastropoda', 0),\n ('AA8704', 1987, 1, 4, 13, '13-APR-87', 35.85, -75.48, 18, 'Calanus_finmarchicus', 2340)], \n dtype=[('cruiseid', '|S6'), ('year', '<i4'), ('station', '<i4'), ('month', '<i4'), ('day', '<i4'), ('date', '|S9'), ('lat', '<f8'), ('lon', '<f8'), ('depth_w', '<i8'), ('taxon', '|S60'), ('count', '<i8')])"
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": "# how can i do the same in pandas ?",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": "dataset = data[selection]\ndesired = pd.DataFrame(dataset)",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": "dataset",
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": "array([ ('AA8704', 1987, 1, 4, 13, '13-APR-87', 35.85, -75.48, 18, 'Gastropoda', 0),\n ('AA8704', 1987, 1, 4, 13, '13-APR-87', 35.85, -75.48, 18, 'Calanus_finmarchicus', 2340)], \n dtype=[('cruiseid', '|S6'), ('year', '<i4'), ('station', '<i4'), ('month', '<i4'), ('day', '<i4'), ('date', '|S9'), ('lat', '<f8'), ('lon', '<f8'), ('depth_w', '<i8'), ('taxon', '|S60'), ('count', '<i8')])"
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": "desired # but with index the Datetime",
"language": "python",
"metadata": {},
"outputs": [
{
"html": "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>cruiseid</th>\n <th>year</th>\n <th>station</th>\n <th>month</th>\n <th>day</th>\n <th>date</th>\n <th>lat</th>\n <th>lon</th>\n <th>depth_w</th>\n <th>taxon</th>\n <th>count</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td><strong>0</strong></td>\n <td> AA8704</td>\n <td> 1987</td>\n <td> 1</td>\n <td> 4</td>\n <td> 13</td>\n <td> 13-APR-87</td>\n <td> 35.85</td>\n <td>-75.48</td>\n <td> 18</td>\n <td> Gastropoda</td>\n <td> 0</td>\n </tr>\n <tr>\n <td><strong>1</strong></td>\n <td> AA8704</td>\n <td> 1987</td>\n <td> 1</td>\n <td> 4</td>\n <td> 13</td>\n <td> 13-APR-87</td>\n <td> 35.85</td>\n <td>-75.48</td>\n <td> 18</td>\n <td> Calanus_finmarchicus</td>\n <td> 2340</td>\n </tr>\n </tbody>\n</table>\n</div>",
"metadata": {},
"output_type": "pyout",
"prompt_number": 11,
"text": " cruiseid year station month day date lat lon depth_w \\\n0 AA8704 1987 1 4 13 13-APR-87 35.85 -75.48 18 \n1 AA8704 1987 1 4 13 13-APR-87 35.85 -75.48 18 \n\n taxon count \n0 Gastropoda 0 \n1 Calanus_finmarchicus 2340 "
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": "",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 11
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment