Skip to content

Instantly share code, notes, and snippets.

@mamacneil
Created September 1, 2017 19:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mamacneil/f91942c3908890f3982718b1d2d7d249 to your computer and use it in GitHub Desktop.
Save mamacneil/f91942c3908890f3982718b1d2d7d249 to your computer and use it in GitHub Desktop.
AIMS LTMP UVC Analysis
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Ancient vs. modern LTMP comparison\n",
"\n",
"To evaluate any potential bias between original LTMP methods and new, length-based, methods."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Import python packages\n",
"%matplotlib inline\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import pymc3 as pm\n",
"from pymc3.backends import SQLite\n",
"import theano as th\n",
"import theano.tensor as tt\n",
"import seaborn as sns\n",
"import matplotlib as mp\n",
"import pdb"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Helper functions\n",
"def indexall(L):\n",
" poo = []\n",
" for p in L:\n",
" if not p in poo:\n",
" poo.append(p)\n",
" Ix = np.array([poo.index(p) for p in L])\n",
" return poo,Ix\n",
"\n",
"def subindexall(short,long):\n",
" poo = []\n",
" out = []\n",
" for s,l in zip(short,long):\n",
" if not l in poo:\n",
" poo.append(l)\n",
" out.append(s)\n",
" return indexall(out)\n",
"\n",
"def firstindex(L):\n",
" ulist = unique(L)\n",
" return np.array([poo.index(p) for p in ulist])\n",
"\n",
"match = lambda a, b: np.array([ b.index(x) if x in b else None for x in a ])"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['Sample_ID', 'Site', 'Method', 'Direction', 'Transect', 'Observer',\n",
" 'Time', 'ACA_BLOC', 'ACA_DUSS', 'ACA_LINE', 'ACA_NCUS', 'ACA_TRIO',\n",
" 'BOL_MURI', 'CEP_ARGU', 'CET_BICO', 'CHA_AFAS', 'CHA_AURI',\n",
" 'CHA_BARO', 'CHA_FLAV', 'CHA_LINE', 'CHA_PLEB', 'CHA_RAFF',\n",
" 'CHA_RAIN', 'CHA_SPEC', 'CHA_TTUS', 'CHA_ULIE', 'CHA_VAGA',\n",
" 'CHE_FASC', 'CHE_UNDU', 'CHM_ROST', 'CHO_FASC', 'CHS_BLEE',\n",
" 'CHS_JAPA', 'CHS_MICR', 'CHS_SORD', 'COR_GAIM', 'CTE_GROP',\n",
" 'EPB_INSI', 'EPI_FASC', 'GOM_VARI', 'HAL_HORT', 'HEM_FASC',\n",
" 'HEM_MELT', 'HIP_LONG', 'LET_MINI', 'LET_NEBU', 'LET_OLIV',\n",
" 'LUT_FLMA', 'MCR_GROP', 'MON_GRAN', 'NAS_TUBE', 'NAS_UNIC',\n",
" 'PMS_LAEV', 'PMS_LEOP', 'SCA_ALTI', 'SCA_CHAM', 'SCA_FLAV',\n",
" 'SCA_FORS', 'SCA_FREN', 'SCA_GHOB', 'SCA_GLOB', 'SCA_NIGR',\n",
" 'SCA_OVIC', 'SCA_PSIT', 'SCA_RIVU', 'SCA_RUBR', 'SCA_SCHL',\n",
" 'SCA_SPIN', 'SIG_ARGE', 'SIG_CORA', 'SIG_PMUS', 'SIG_PTUS',\n",
" 'SIG_PUEL', 'SIG_VULP', 'ZAN_CORN', 'ZEB_SCOP', 'ZEB_VELI',\n",
" 'Acanthuridae', 'Chaetodontidae', 'Labridae', 'Lethrinidae',\n",
" 'Lutjanidae', 'Scaridae', 'Serranidae', 'Siganidae'], dtype=object)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Import data\n",
"xdata = pd.read_csv('uvc.csv')\n",
"xdata.columns.values"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(110, 85)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xdata.shape"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sample_ID</th>\n",
" <th>Site</th>\n",
" <th>Method</th>\n",
" <th>Direction</th>\n",
" <th>Transect</th>\n",
" <th>Observer</th>\n",
" <th>Time</th>\n",
" <th>ACA_BLOC</th>\n",
" <th>ACA_DUSS</th>\n",
" <th>ACA_LINE</th>\n",
" <th>...</th>\n",
" <th>ZEB_SCOP</th>\n",
" <th>ZEB_VELI</th>\n",
" <th>Acanthuridae</th>\n",
" <th>Chaetodontidae</th>\n",
" <th>Labridae</th>\n",
" <th>Lethrinidae</th>\n",
" <th>Lutjanidae</th>\n",
" <th>Scaridae</th>\n",
" <th>Serranidae</th>\n",
" <th>Siganidae</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>NS102</td>\n",
" <td>1</td>\n",
" <td>Length</td>\n",
" <td>Out</td>\n",
" <td>1</td>\n",
" <td>AC</td>\n",
" <td>1356</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>NS102</td>\n",
" <td>1</td>\n",
" <td>Length</td>\n",
" <td>Out</td>\n",
" <td>3</td>\n",
" <td>AC</td>\n",
" <td>1404</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>22</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>NS102</td>\n",
" <td>1</td>\n",
" <td>Length</td>\n",
" <td>Out</td>\n",
" <td>5</td>\n",
" <td>AC</td>\n",
" <td>1415</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>NS102</td>\n",
" <td>1</td>\n",
" <td>Length</td>\n",
" <td>Out</td>\n",
" <td>7</td>\n",
" <td>AC</td>\n",
" <td>1424</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>42</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>NS102</td>\n",
" <td>1</td>\n",
" <td>Length</td>\n",
" <td>Out</td>\n",
" <td>9</td>\n",
" <td>AC</td>\n",
" <td>1435</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>16</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>NS102</td>\n",
" <td>1</td>\n",
" <td>Count</td>\n",
" <td>Out</td>\n",
" <td>2</td>\n",
" <td>IM</td>\n",
" <td>1352</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>NS102</td>\n",
" <td>1</td>\n",
" <td>Count</td>\n",
" <td>Out</td>\n",
" <td>4</td>\n",
" <td>IM</td>\n",
" <td>1402</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>13</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>NS102</td>\n",
" <td>1</td>\n",
" <td>Count</td>\n",
" <td>Out</td>\n",
" <td>6</td>\n",
" <td>IM</td>\n",
" <td>1412</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>18</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>NS102</td>\n",
" <td>1</td>\n",
" <td>Count</td>\n",
" <td>Out</td>\n",
" <td>8</td>\n",
" <td>IM</td>\n",
" <td>1422</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>29</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>NS102</td>\n",
" <td>1</td>\n",
" <td>Count</td>\n",
" <td>Out</td>\n",
" <td>10</td>\n",
" <td>IM</td>\n",
" <td>1432</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>13</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10 rows × 85 columns</p>\n",
"</div>"
],
"text/plain": [
" Sample_ID Site Method Direction Transect Observer Time ACA_BLOC \\\n",
"10 NS102 1 Length Out 1 AC 1356 0 \n",
"11 NS102 1 Length Out 3 AC 1404 0 \n",
"12 NS102 1 Length Out 5 AC 1415 0 \n",
"13 NS102 1 Length Out 7 AC 1424 0 \n",
"14 NS102 1 Length Out 9 AC 1435 0 \n",
"15 NS102 1 Count Out 2 IM 1352 0 \n",
"16 NS102 1 Count Out 4 IM 1402 0 \n",
"17 NS102 1 Count Out 6 IM 1412 0 \n",
"18 NS102 1 Count Out 8 IM 1422 0 \n",
"19 NS102 1 Count Out 10 IM 1432 0 \n",
"\n",
" ACA_DUSS ACA_LINE ... ZEB_SCOP ZEB_VELI Acanthuridae \\\n",
"10 0 0 ... 0 0 2 \n",
"11 0 0 ... 0 0 5 \n",
"12 0 0 ... 0 0 2 \n",
"13 0 0 ... 0 0 2 \n",
"14 0 0 ... 1 0 1 \n",
"15 0 0 ... 0 0 1 \n",
"16 0 0 ... 0 0 1 \n",
"17 0 0 ... 1 0 3 \n",
"18 0 0 ... 0 0 0 \n",
"19 0 0 ... 0 0 3 \n",
"\n",
" Chaetodontidae Labridae Lethrinidae Lutjanidae Scaridae Serranidae \\\n",
"10 2 2 0 0 9 0 \n",
"11 4 4 0 0 22 0 \n",
"12 2 2 1 0 14 3 \n",
"13 5 2 0 0 42 0 \n",
"14 7 4 0 0 16 0 \n",
"15 3 3 0 0 8 1 \n",
"16 3 1 0 0 13 1 \n",
"17 0 1 0 0 18 1 \n",
"18 4 0 0 1 29 0 \n",
"19 0 1 0 0 13 0 \n",
"\n",
" Siganidae \n",
"10 3 \n",
"11 5 \n",
"12 2 \n",
"13 2 \n",
"14 4 \n",
"15 3 \n",
"16 2 \n",
"17 4 \n",
"18 2 \n",
"19 4 \n",
"\n",
"[10 rows x 85 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Remove NS101\n",
"xdata = xdata[xdata.Sample_ID != 'NS101']\n",
"xdata.iloc[:10]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Create sample site variable\n",
"xdata['SampSite'] = xdata.Sample_ID.astype(str)+'_'+xdata.Site.astype(str)\n",
"\n",
"# Calculate transect-level richness\n",
"xdata['Richness'] = (xdata.ix[:,'ACA_BLOC':'ZEB_VELI']>1).sum(1).values\n",
"\n",
"# Calculate transect-level abundance\n",
"xdata['Abundance'] = xdata.ix[:,'ACA_BLOC':'ZEB_VELI'].sum(1).values"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Calculate average transect duration for properly-recorded transects"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 48, 3, 8, 4, 9, 3, 8, 4, 8, -3, -7, -4, -7,\n",
" -5, -8, -3, -47, -3])"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# List to store intervals\n",
"intvl = []\n",
"# Grab dataset with just AC and ME\n",
"tmp = xdata[xdata['Observer'].isin(['AC','ME'])].copy()\n",
"# Calculate the average interval between subsequent transects at each site\n",
"for s in tmp.Sample_ID.unique():\n",
" tmp2 = tmp.loc[tmp['Sample_ID']==s]\n",
" # Outbound\n",
" tmp3 = tmp2.loc[tmp2['Direction']=='Out']\n",
" for i in range(1,11):\n",
" intvl += list(tmp3.Time.loc[tmp3.Transect==i+1].values-tmp3.Time.loc[tmp3.Transect==i].values)\n",
" # Return leg\n",
" tmp4 = tmp2.loc[tmp2['Direction']=='Back']\n",
" for i in range(1,11):\n",
" intvl += list(tmp4.Time.loc[tmp4.Transect==i].values-tmp4.Time.loc[tmp4.Transect==i-1].values)\n",
"intvl = np.array(intvl)\n",
"intvl"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"5.4375"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Average duration of transect\n",
"mu_intvl = np.mean(abs(np.array(intvl))[abs(np.array(intvl))<15])\n",
"mu_intvl"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Fix IM times\n",
"Estimate the time differential between IM and the other clocks"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# List to store intervals\n",
"bintvl = []\n",
"# Record if IM is first or second observation\n",
"iIM = []"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# Grab dataset with just IM and ME\n",
"tmp = xdata[xdata['Observer'].isin(['IM','ME'])].copy()\n",
"# Calculate the average interval between subsequent transects at each site\n",
"for s in tmp.Sample_ID.unique():\n",
" tmp2 = tmp.loc[tmp['Sample_ID']==s]\n",
" # Outbound\n",
" tmp3 = tmp2.loc[tmp2['Direction']=='Out']\n",
" for i in range(1,11):\n",
" out = list(tmp3.Time.loc[tmp3.Transect==i+1].values-tmp3.Time.loc[tmp3.Transect==i].values)\n",
" bintvl += out\n",
" # flag for empty additions of out\n",
" if len(out)==1:\n",
" iIM += list(tmp3.Observer.loc[tmp3.Transect==i+1].values=='IM')\n",
" # Return leg\n",
" tmp4 = tmp2.loc[tmp2['Direction']=='Back']\n",
" for i in range(1,11):\n",
" out2 = list(tmp4.Time.loc[tmp4.Transect==i].values-tmp4.Time.loc[tmp4.Transect==i-1].values)\n",
" bintvl += out2\n",
" # flag for empty additions of out2\n",
" if len(out2)==1:\n",
" iIM += list(tmp4.Observer.loc[tmp4.Transect==i].values=='IM')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# Grab dataset with just IM and AC\n",
"tmp = xdata[xdata['Observer'].isin(['IM','AC'])]\n",
"# Calculate the average interval between subsequent transects at each site\n",
"for s in tmp.Sample_ID.unique():\n",
" tmp2 = tmp.loc[tmp['Sample_ID']==s]\n",
" # Outbound\n",
" tmp3 = tmp2.loc[tmp2['Direction']=='Out']\n",
" for i in range(1,11):\n",
" out = list(tmp3.Time.loc[tmp3.Transect==i+1].values-tmp3.Time.loc[tmp3.Transect==i].values)\n",
" bintvl += out\n",
" # flag for empty additions of out\n",
" if len(out)==1:\n",
" iIM += list(tmp3.Observer.loc[tmp3.Transect==i+1].values=='IM')\n",
" # Return leg\n",
" tmp4 = tmp2.loc[tmp2['Direction']=='Back']\n",
" for i in range(1,11):\n",
" out2 = list(tmp4.Time.loc[tmp4.Transect==i].values-tmp4.Time.loc[tmp4.Transect==i-1].values)\n",
" bintvl += out2\n",
" # flag for empty additions of out2\n",
" if len(out2)==1:\n",
" iIM += list(tmp4.Observer.loc[tmp4.Transect==i].values=='IM')"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# Calculate average interval duration\n",
"bintvl = np.array(bintvl)\n",
"iIM = np.array(iIM)*1\n",
"iIM = (iIM+-1+iIM*1)*-1"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"5.8558673469387754"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Calculate level of offset for IM's watch\n",
"oset = abs((mu_intvl+bintvl*iIM)*(iIM==-1)+(bintvl-mu_intvl*iIM)*(iIM==1))\n",
"IM_off = oset[oset<15].mean()\n",
"IM_off"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Create new corrected times\n",
"tmp = xdata.Time.values\n",
"tmp[xdata.Observer.values=='IM'] = tmp[xdata.Observer.values=='IM'] - round(IM_off).astype(int)\n",
"xdata['TimeAdj'] = tmp"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Data\n",
"\n",
"Next we need to calculate the interval times for the return back"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# Site-transect combination\n",
"xdata['SiteTrans'] = xdata.Site.astype(str)+'_'+xdata.Transect.astype(str)\n",
"# Site-transect direction combination\n",
"xdata['SiteTransDir'] = xdata.Site.astype(str)+'_'+xdata.Transect.astype(str)+'_'+xdata.Direction"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"# Empty array to hold return times\n",
"nobs = len(xdata.Time.values)\n",
"RTime = np.zeros(nobs)\n",
"\n",
"for i in range(nobs):\n",
" if xdata.Direction.iloc[i]=='Back':\n",
" try:\n",
" # Get out time plus typical time to do a transect\n",
" poo = xdata.Time.loc[xdata.SiteTransDir==xdata.SiteTrans.iloc[i]+'_'+'Out'].values[0]+mu_intvl\n",
" except IndexError:\n",
" # Start with back\n",
" poo = xdata.Time.iloc[i]\n",
" RTime[i] = xdata.Time.iloc[i]-poo"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 0. , 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 62.5625, 82.5625,\n",
" 103.5625, 125.5625, 186.5625, 71.5625, 93.5625, 113.5625,\n",
" 135.5625, 193.5625, 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. , 0. ,\n",
" 75.5625, 98.5625, 120.5625, 143.5625, 245.5625, 47.5625,\n",
" 70.5625, 91.5625, 115.5625, 136.5625, 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 36.5625, 97.5625, 160.5625, 189.5625,\n",
" 211.5625, 32.5625, 94.5625, 164.5625, 186.5625, 208.5625,\n",
" 0. , 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 18.5625, 78.5625,\n",
" 100.5625, 161.5625, 182.5625, 66.5625, 87.5625, 108.5625,\n",
" 169.5625, 189.5625, 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. , 0. ,\n",
" 53.5625, 74.5625, 96.5625, 123.5625, 186.5625, 69.5625,\n",
" 90.5625, 111.5625, 133.5625, 202.5625])"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Time in per 100 min\n",
"xdata['RTime'] = RTime\n",
"xdata['RTime'].values"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([1, 2, 3, 4, 5]),\n",
" array(['1_1', '1_3', '1_5', '1_7', '1_9', '1_2', '1_4', '1_6', '1_8',\n",
" '1_10', '2_2', '2_4', '2_6', '2_8', '2_10', '2_1', '2_3', '2_5',\n",
" '2_7', '2_9', '3_2', '3_4', '3_6', '3_8', '3_10', '3_1', '3_3',\n",
" '3_5', '3_7', '3_9', '4_1', '4_3', '4_5', '4_7', '4_9', '4_2',\n",
" '4_4', '4_6', '4_8', '4_10', '5_1', '5_3', '5_5', '5_7', '5_9',\n",
" '5_2', '5_4', '5_6', '5_8', '5_10'], dtype=object),\n",
" array(['Length', 'Count'], dtype=object),\n",
" array(['Out', 'Back'], dtype=object),\n",
" array(['AC', 'IM', 'ME'], dtype=object))"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xdata.Site.unique(), xdata.SiteTrans.unique(), xdata.Method.unique(), xdata.Direction.unique(), xdata.Observer.unique()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"# Set up sampling design - transects within sites\n",
"Site,Is = subindexall(xdata.Site.values, xdata.SiteTrans.values)\n",
"nsite = len(Site)\n",
"\n",
"# Index transect\n",
"Trans,It = indexall(xdata.SiteTrans.values)\n",
"ntrans = len(Trans)\n",
"\n",
"# Method\n",
"Method,Im = indexall(xdata.Method.values)\n",
"\n",
"# Direction\n",
"Direction,Id = indexall(xdata.Direction.values)\n",
"iout = np.array([i for i, x in enumerate(xdata.Direction.values) if x=='Out'])\n",
"iback = np.array([i for i, x in enumerate(xdata.Direction.values) if x=='Back'])\n",
"\n",
"# Observer\n",
"Observer,Io = indexall(xdata.Observer.values)\n",
"npeeps = len(Observer)\n",
"\n",
"# Diver time\n",
"RTime = xdata.RTime.values"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(['Length', 'Count'], ['Out', 'Back'], ['AC', 'IM', 'ME'])"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Method, Direction, Observer"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Set up data loop"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"Var_names = ['Acanthuridae', 'Chaetodontidae', 'Labridae', 'Lutjanidae', 'Scaridae', 'Serranidae', 'Siganidae','Richness','Abundance']\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Bayesian hierarhical model"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"# Shared data trick from https://github.com/pymc-devs/pymc3/issues/1825\n",
"shared_data = th.shared(xdata[Var_names[0]].values, borrow=True)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"Model = pm.Model()\n",
"\n",
"with Model:\n",
" # Global estimate\n",
" γ0 = pm.Normal('Global_intercept', mu=0, tau=0.001)\n",
" # Errors among Sites\n",
" σ0 = pm.Uniform('Site_SD', lower=0., upper=100)\n",
" τ0 = σ0**-2\n",
" \n",
" # Site-level estimates\n",
" κ0 = pm.Normal('Site_intercept', mu=γ0, tau=τ0, shape=nsite)\n",
" \n",
" # Errors within Sites\n",
" σ1 = pm.Uniform('Trans_SD', lower=0., upper=100)\n",
" τ1 = σ1**-2\n",
" \n",
" # Transect-level intercepts\n",
" β0 = pm.Normal('Transect_intercept', mu=κ0[Is], tau=τ1, shape=ntrans)\n",
" βt = pm.Normal('Direction_'+Direction[1], mu=0.0, tau=0.001)\n",
" \n",
" # Transect-level covariates\n",
" β1 = pm.Normal('Method_'+Method[1], mu=0.0, tau=0.001)\n",
" β2 = pm.Normal('Time_lag', mu=0.0, tau=0.001)\n",
" b3 = pm.Normal('Observer', mu=0.0, tau=0.001, shape=npeeps-1)\n",
" β3 = tt.set_subtensor(tt.zeros(shape=npeeps)[1:], b3)\n",
" \n",
" # NB rate\n",
" λ = pm.Deterministic('lambda_out', tt.exp(β0[It]+βt*Id+β1*Im+β2*RTime+β3[Io]))\n",
" \n",
" # NB dispersion\n",
" α = pm.Gamma('alpha', alpha=0.001, beta=0.001)\n",
" \n",
" # Likelihoods\n",
" Yi = pm.NegativeBinomial('Yi', mu=λ, alpha=α, observed=shared_data)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 20000/20000 [00:33<00:00, 594.81it/s]\n",
"100%|██████████| 20000/20000 [00:32<00:00, 608.20it/s]\n",
"100%|██████████| 20000/20000 [00:33<00:00, 590.27it/s]\n",
"100%|██████████| 20000/20000 [00:32<00:00, 612.69it/s]\n",
"100%|██████████| 20000/20000 [00:33<00:00, 594.33it/s]\n",
"100%|██████████| 20000/20000 [00:33<00:00, 593.66it/s]\n",
"100%|██████████| 20000/20000 [00:33<00:00, 598.19it/s]\n",
"100%|██████████| 20000/20000 [00:31<00:00, 777.27it/s]\n",
"100%|██████████| 20000/20000 [00:32<00:00, 612.83it/s]\n"
]
},
{
"data": {
"text/plain": [
"'\\n# Sampling\\nwith Model:\\n step = pm.Metropolis()\\n # Draw samples\\n trace = pm.sample(20000, step=step, progressbar=True)\\n#'"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"#\"\"\"\n",
"# Iterate over datasets\n",
"for var in Var_names:\n",
" # Update new response variable\n",
" shared_data.set_value(xdata[var].values)\n",
" # Sampling\n",
" with Model:\n",
" step = pm.Metropolis()\n",
" # Draw samples\n",
" pm.sample(20000, progressbar=True, step=step, trace=SQLite(var+'_trace.db'))\n",
"#\"\"\"\n",
"\n",
"\"\"\"\n",
"# Sampling\n",
"with Model:\n",
" step = pm.Metropolis()\n",
" # Draw samples\n",
" trace = pm.sample(20000, step=step, progressbar=True)\n",
"#\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('Method_Count', 'Direction_Back')"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"'Method_'+Method[1], 'Direction_'+Direction[1]"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"Next import those traces and plot..."
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"#pm.traceplot(trace);"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment