mamacneil/UVC_Shizza.ipynb Secret

## UVC_Shizza.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ancient vs. modern LTMP comparison\n",
    "\n",
    "To evaluate any potential bias between original LTMP methods and new, length-based, methods."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import python packages\n",
    "%matplotlib inline\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import pymc3 as pm\n",
    "from pymc3.backends import SQLite\n",
    "import theano as th\n",
    "import theano.tensor as tt\n",
    "import seaborn as sns\n",
    "import matplotlib as mp\n",
    "import pdb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Helper functions\n",
    "def indexall(L):\n",
    "    poo = []\n",
    "    for p in L:\n",
    "        if not p in poo:\n",
    "            poo.append(p)\n",
    "    Ix = np.array([poo.index(p) for p in L])\n",
    "    return poo,Ix\n",
    "\n",
    "def subindexall(short,long):\n",
    "    poo = []\n",
    "    out = []\n",
    "    for s,l in zip(short,long):\n",
    "        if not l in poo:\n",
    "            poo.append(l)\n",
    "            out.append(s)\n",
    "    return indexall(out)\n",
    "\n",
    "def firstindex(L):\n",
    "    ulist = unique(L)\n",
    "    return np.array([poo.index(p) for p in ulist])\n",
    "\n",
    "match = lambda a, b: np.array([ b.index(x) if x in b else None for x in a ])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Sample_ID', 'Site', 'Method', 'Direction', 'Transect', 'Observer',\n",
       "       'Time', 'ACA_BLOC', 'ACA_DUSS', 'ACA_LINE', 'ACA_NCUS', 'ACA_TRIO',\n",
       "       'BOL_MURI', 'CEP_ARGU', 'CET_BICO', 'CHA_AFAS', 'CHA_AURI',\n",
       "       'CHA_BARO', 'CHA_FLAV', 'CHA_LINE', 'CHA_PLEB', 'CHA_RAFF',\n",
       "       'CHA_RAIN', 'CHA_SPEC', 'CHA_TTUS', 'CHA_ULIE', 'CHA_VAGA',\n",
       "       'CHE_FASC', 'CHE_UNDU', 'CHM_ROST', 'CHO_FASC', 'CHS_BLEE',\n",
       "       'CHS_JAPA', 'CHS_MICR', 'CHS_SORD', 'COR_GAIM', 'CTE_GROP',\n",
       "       'EPB_INSI', 'EPI_FASC', 'GOM_VARI', 'HAL_HORT', 'HEM_FASC',\n",
       "       'HEM_MELT', 'HIP_LONG', 'LET_MINI', 'LET_NEBU', 'LET_OLIV',\n",
       "       'LUT_FLMA', 'MCR_GROP', 'MON_GRAN', 'NAS_TUBE', 'NAS_UNIC',\n",
       "       'PMS_LAEV', 'PMS_LEOP', 'SCA_ALTI', 'SCA_CHAM', 'SCA_FLAV',\n",
       "       'SCA_FORS', 'SCA_FREN', 'SCA_GHOB', 'SCA_GLOB', 'SCA_NIGR',\n",
       "       'SCA_OVIC', 'SCA_PSIT', 'SCA_RIVU', 'SCA_RUBR', 'SCA_SCHL',\n",
       "       'SCA_SPIN', 'SIG_ARGE', 'SIG_CORA', 'SIG_PMUS', 'SIG_PTUS',\n",
       "       'SIG_PUEL', 'SIG_VULP', 'ZAN_CORN', 'ZEB_SCOP', 'ZEB_VELI',\n",
       "       'Acanthuridae', 'Chaetodontidae', 'Labridae', 'Lethrinidae',\n",
       "       'Lutjanidae', 'Scaridae', 'Serranidae', 'Siganidae'], dtype=object)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Import data\n",
    "xdata = pd.read_csv('uvc.csv')\n",
    "xdata.columns.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(110, 85)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xdata.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Sample_ID</th>\n",
       "      <th>Site</th>\n",
       "      <th>Method</th>\n",
       "      <th>Direction</th>\n",
       "      <th>Transect</th>\n",
       "      <th>Observer</th>\n",
       "      <th>Time</th>\n",
       "      <th>ACA_BLOC</th>\n",
       "      <th>ACA_DUSS</th>\n",
       "      <th>ACA_LINE</th>\n",
       "      <th>...</th>\n",
       "      <th>ZEB_SCOP</th>\n",
       "      <th>ZEB_VELI</th>\n",
       "      <th>Acanthuridae</th>\n",
       "      <th>Chaetodontidae</th>\n",
       "      <th>Labridae</th>\n",
       "      <th>Lethrinidae</th>\n",
       "      <th>Lutjanidae</th>\n",
       "      <th>Scaridae</th>\n",
       "      <th>Serranidae</th>\n",
       "      <th>Siganidae</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>NS102</td>\n",
       "      <td>1</td>\n",
       "      <td>Length</td>\n",
       "      <td>Out</td>\n",
       "      <td>1</td>\n",
       "      <td>AC</td>\n",
       "      <td>1356</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>NS102</td>\n",
       "      <td>1</td>\n",
       "      <td>Length</td>\n",
       "      <td>Out</td>\n",
       "      <td>3</td>\n",
       "      <td>AC</td>\n",
       "      <td>1404</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>22</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>NS102</td>\n",
       "      <td>1</td>\n",
       "      <td>Length</td>\n",
       "      <td>Out</td>\n",
       "      <td>5</td>\n",
       "      <td>AC</td>\n",
       "      <td>1415</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>14</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>NS102</td>\n",
       "      <td>1</td>\n",
       "      <td>Length</td>\n",
       "      <td>Out</td>\n",
       "      <td>7</td>\n",
       "      <td>AC</td>\n",
       "      <td>1424</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>42</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>NS102</td>\n",
       "      <td>1</td>\n",
       "      <td>Length</td>\n",
       "      <td>Out</td>\n",
       "      <td>9</td>\n",
       "      <td>AC</td>\n",
       "      <td>1435</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>16</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>NS102</td>\n",
       "      <td>1</td>\n",
       "      <td>Count</td>\n",
       "      <td>Out</td>\n",
       "      <td>2</td>\n",
       "      <td>IM</td>\n",
       "      <td>1352</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>NS102</td>\n",
       "      <td>1</td>\n",
       "      <td>Count</td>\n",
       "      <td>Out</td>\n",
       "      <td>4</td>\n",
       "      <td>IM</td>\n",
       "      <td>1402</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>NS102</td>\n",
       "      <td>1</td>\n",
       "      <td>Count</td>\n",
       "      <td>Out</td>\n",
       "      <td>6</td>\n",
       "      <td>IM</td>\n",
       "      <td>1412</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>18</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>NS102</td>\n",
       "      <td>1</td>\n",
       "      <td>Count</td>\n",
       "      <td>Out</td>\n",
       "      <td>8</td>\n",
       "      <td>IM</td>\n",
       "      <td>1422</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>29</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>NS102</td>\n",
       "      <td>1</td>\n",
       "      <td>Count</td>\n",
       "      <td>Out</td>\n",
       "      <td>10</td>\n",
       "      <td>IM</td>\n",
       "      <td>1432</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 85 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Sample_ID  Site  Method Direction  Transect Observer  Time  ACA_BLOC  \\\n",
       "10     NS102     1  Length       Out         1       AC  1356         0   \n",
       "11     NS102     1  Length       Out         3       AC  1404         0   \n",
       "12     NS102     1  Length       Out         5       AC  1415         0   \n",
       "13     NS102     1  Length       Out         7       AC  1424         0   \n",
       "14     NS102     1  Length       Out         9       AC  1435         0   \n",
       "15     NS102     1   Count       Out         2       IM  1352         0   \n",
       "16     NS102     1   Count       Out         4       IM  1402         0   \n",
       "17     NS102     1   Count       Out         6       IM  1412         0   \n",
       "18     NS102     1   Count       Out         8       IM  1422         0   \n",
       "19     NS102     1   Count       Out        10       IM  1432         0   \n",
       "\n",
       "    ACA_DUSS  ACA_LINE    ...      ZEB_SCOP  ZEB_VELI  Acanthuridae  \\\n",
       "10         0         0    ...             0         0             2   \n",
       "11         0         0    ...             0         0             5   \n",
       "12         0         0    ...             0         0             2   \n",
       "13         0         0    ...             0         0             2   \n",
       "14         0         0    ...             1         0             1   \n",
       "15         0         0    ...             0         0             1   \n",
       "16         0         0    ...             0         0             1   \n",
       "17         0         0    ...             1         0             3   \n",
       "18         0         0    ...             0         0             0   \n",
       "19         0         0    ...             0         0             3   \n",
       "\n",
       "    Chaetodontidae  Labridae  Lethrinidae  Lutjanidae  Scaridae  Serranidae  \\\n",
       "10               2         2            0           0         9           0   \n",
       "11               4         4            0           0        22           0   \n",
       "12               2         2            1           0        14           3   \n",
       "13               5         2            0           0        42           0   \n",
       "14               7         4            0           0        16           0   \n",
       "15               3         3            0           0         8           1   \n",
       "16               3         1            0           0        13           1   \n",
       "17               0         1            0           0        18           1   \n",
       "18               4         0            0           1        29           0   \n",
       "19               0         1            0           0        13           0   \n",
       "\n",
       "    Siganidae  \n",
       "10          3  \n",
       "11          5  \n",
       "12          2  \n",
       "13          2  \n",
       "14          4  \n",
       "15          3  \n",
       "16          2  \n",
       "17          4  \n",
       "18          2  \n",
       "19          4  \n",
       "\n",
       "[10 rows x 85 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Remove NS101\n",
    "xdata = xdata[xdata.Sample_ID != 'NS101']\n",
    "xdata.iloc[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Create sample site variable\n",
    "xdata['SampSite'] = xdata.Sample_ID.astype(str)+'_'+xdata.Site.astype(str)\n",
    "\n",
    "# Calculate transect-level richness\n",
    "xdata['Richness'] = (xdata.ix[:,'ACA_BLOC':'ZEB_VELI']>1).sum(1).values\n",
    "\n",
    "# Calculate transect-level abundance\n",
    "xdata['Abundance'] = xdata.ix[:,'ACA_BLOC':'ZEB_VELI'].sum(1).values"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Calculate average transect duration for properly-recorded transects"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 48,   3,   8,   4,   9,   3,   8,   4,   8,  -3,  -7,  -4,  -7,\n",
       "        -5,  -8,  -3, -47,  -3])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# List to store intervals\n",
    "intvl = []\n",
    "# Grab dataset with just AC and ME\n",
    "tmp = xdata[xdata['Observer'].isin(['AC','ME'])].copy()\n",
    "# Calculate the average interval between subsequent transects at each site\n",
    "for s in tmp.Sample_ID.unique():\n",
    "    tmp2 = tmp.loc[tmp['Sample_ID']==s]\n",
    "    # Outbound\n",
    "    tmp3 = tmp2.loc[tmp2['Direction']=='Out']\n",
    "    for i in range(1,11):\n",
    "        intvl += list(tmp3.Time.loc[tmp3.Transect==i+1].values-tmp3.Time.loc[tmp3.Transect==i].values)\n",
    "    # Return leg\n",
    "    tmp4 = tmp2.loc[tmp2['Direction']=='Back']\n",
    "    for i in range(1,11):\n",
    "        intvl += list(tmp4.Time.loc[tmp4.Transect==i].values-tmp4.Time.loc[tmp4.Transect==i-1].values)\n",
    "intvl = np.array(intvl)\n",
    "intvl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5.4375"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Average duration of transect\n",
    "mu_intvl = np.mean(abs(np.array(intvl))[abs(np.array(intvl))<15])\n",
    "mu_intvl"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Fix IM times\n",
    "Estimate the time differential between IM and the other clocks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# List to store intervals\n",
    "bintvl = []\n",
    "# Record if IM is first or second observation\n",
    "iIM = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Grab dataset with just IM and ME\n",
    "tmp = xdata[xdata['Observer'].isin(['IM','ME'])].copy()\n",
    "# Calculate the average interval between subsequent transects at each site\n",
    "for s in tmp.Sample_ID.unique():\n",
    "    tmp2 = tmp.loc[tmp['Sample_ID']==s]\n",
    "    # Outbound\n",
    "    tmp3 = tmp2.loc[tmp2['Direction']=='Out']\n",
    "    for i in range(1,11):\n",
    "        out = list(tmp3.Time.loc[tmp3.Transect==i+1].values-tmp3.Time.loc[tmp3.Transect==i].values)\n",
    "        bintvl += out\n",
    "        # flag for empty additions of out\n",
    "        if len(out)==1:\n",
    "            iIM += list(tmp3.Observer.loc[tmp3.Transect==i+1].values=='IM')\n",
    "    # Return leg\n",
    "    tmp4 = tmp2.loc[tmp2['Direction']=='Back']\n",
    "    for i in range(1,11):\n",
    "        out2 = list(tmp4.Time.loc[tmp4.Transect==i].values-tmp4.Time.loc[tmp4.Transect==i-1].values)\n",
    "        bintvl += out2\n",
    "        # flag for empty additions of out2\n",
    "        if len(out2)==1:\n",
    "            iIM += list(tmp4.Observer.loc[tmp4.Transect==i].values=='IM')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Grab dataset with just IM and AC\n",
    "tmp = xdata[xdata['Observer'].isin(['IM','AC'])]\n",
    "# Calculate the average interval between subsequent transects at each site\n",
    "for s in tmp.Sample_ID.unique():\n",
    "    tmp2 = tmp.loc[tmp['Sample_ID']==s]\n",
    "    # Outbound\n",
    "    tmp3 = tmp2.loc[tmp2['Direction']=='Out']\n",
    "    for i in range(1,11):\n",
    "        out = list(tmp3.Time.loc[tmp3.Transect==i+1].values-tmp3.Time.loc[tmp3.Transect==i].values)\n",
    "        bintvl += out\n",
    "        # flag for empty additions of out\n",
    "        if len(out)==1:\n",
    "            iIM += list(tmp3.Observer.loc[tmp3.Transect==i+1].values=='IM')\n",
    "    # Return leg\n",
    "    tmp4 = tmp2.loc[tmp2['Direction']=='Back']\n",
    "    for i in range(1,11):\n",
    "        out2 = list(tmp4.Time.loc[tmp4.Transect==i].values-tmp4.Time.loc[tmp4.Transect==i-1].values)\n",
    "        bintvl += out2\n",
    "        # flag for empty additions of out2\n",
    "        if len(out2)==1:\n",
    "            iIM += list(tmp4.Observer.loc[tmp4.Transect==i].values=='IM')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Calculate average interval duration\n",
    "bintvl = np.array(bintvl)\n",
    "iIM = np.array(iIM)*1\n",
    "iIM = (iIM+-1+iIM*1)*-1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5.8558673469387754"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Calculate level of offset for IM's watch\n",
    "oset = abs((mu_intvl+bintvl*iIM)*(iIM==-1)+(bintvl-mu_intvl*iIM)*(iIM==1))\n",
    "IM_off = oset[oset<15].mean()\n",
    "IM_off"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Create new corrected times\n",
    "tmp = xdata.Time.values\n",
    "tmp[xdata.Observer.values=='IM'] = tmp[xdata.Observer.values=='IM'] - round(IM_off).astype(int)\n",
    "xdata['TimeAdj'] = tmp"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data\n",
    "\n",
    "Next we need to calculate the interval times for the return back"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Site-transect combination\n",
    "xdata['SiteTrans'] = xdata.Site.astype(str)+'_'+xdata.Transect.astype(str)\n",
    "# Site-transect direction combination\n",
    "xdata['SiteTransDir'] = xdata.Site.astype(str)+'_'+xdata.Transect.astype(str)+'_'+xdata.Direction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Empty array to hold return times\n",
    "nobs = len(xdata.Time.values)\n",
    "RTime = np.zeros(nobs)\n",
    "\n",
    "for i in range(nobs):\n",
    "    if xdata.Direction.iloc[i]=='Back':\n",
    "        try:\n",
    "            # Get out time plus typical time to do a transect\n",
    "            poo = xdata.Time.loc[xdata.SiteTransDir==xdata.SiteTrans.iloc[i]+'_'+'Out'].values[0]+mu_intvl\n",
    "        except IndexError:\n",
    "            # Start with back\n",
    "            poo = xdata.Time.iloc[i]\n",
    "        RTime[i] = xdata.Time.iloc[i]-poo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([   0.    ,    0.    ,    0.    ,    0.    ,    0.    ,    0.    ,\n",
       "          0.    ,    0.    ,    0.    ,    0.    ,   62.5625,   82.5625,\n",
       "        103.5625,  125.5625,  186.5625,   71.5625,   93.5625,  113.5625,\n",
       "        135.5625,  193.5625,    0.    ,    0.    ,    0.    ,    0.    ,\n",
       "          0.    ,    0.    ,    0.    ,    0.    ,    0.    ,    0.    ,\n",
       "         75.5625,   98.5625,  120.5625,  143.5625,  245.5625,   47.5625,\n",
       "         70.5625,   91.5625,  115.5625,  136.5625,    0.    ,    0.    ,\n",
       "          0.    ,    0.    ,    0.    ,    0.    ,    0.    ,    0.    ,\n",
       "          0.    ,    0.    ,   36.5625,   97.5625,  160.5625,  189.5625,\n",
       "        211.5625,   32.5625,   94.5625,  164.5625,  186.5625,  208.5625,\n",
       "          0.    ,    0.    ,    0.    ,    0.    ,    0.    ,    0.    ,\n",
       "          0.    ,    0.    ,    0.    ,    0.    ,   18.5625,   78.5625,\n",
       "        100.5625,  161.5625,  182.5625,   66.5625,   87.5625,  108.5625,\n",
       "        169.5625,  189.5625,    0.    ,    0.    ,    0.    ,    0.    ,\n",
       "          0.    ,    0.    ,    0.    ,    0.    ,    0.    ,    0.    ,\n",
       "         53.5625,   74.5625,   96.5625,  123.5625,  186.5625,   69.5625,\n",
       "         90.5625,  111.5625,  133.5625,  202.5625])"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Time in per 100 min\n",
    "xdata['RTime'] = RTime\n",
    "xdata['RTime'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([1, 2, 3, 4, 5]),\n",
       " array(['1_1', '1_3', '1_5', '1_7', '1_9', '1_2', '1_4', '1_6', '1_8',\n",
       "        '1_10', '2_2', '2_4', '2_6', '2_8', '2_10', '2_1', '2_3', '2_5',\n",
       "        '2_7', '2_9', '3_2', '3_4', '3_6', '3_8', '3_10', '3_1', '3_3',\n",
       "        '3_5', '3_7', '3_9', '4_1', '4_3', '4_5', '4_7', '4_9', '4_2',\n",
       "        '4_4', '4_6', '4_8', '4_10', '5_1', '5_3', '5_5', '5_7', '5_9',\n",
       "        '5_2', '5_4', '5_6', '5_8', '5_10'], dtype=object),\n",
       " array(['Length', 'Count'], dtype=object),\n",
       " array(['Out', 'Back'], dtype=object),\n",
       " array(['AC', 'IM', 'ME'], dtype=object))"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xdata.Site.unique(), xdata.SiteTrans.unique(), xdata.Method.unique(), xdata.Direction.unique(), xdata.Observer.unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set up sampling design - transects within sites\n",
    "Site,Is = subindexall(xdata.Site.values, xdata.SiteTrans.values)\n",
    "nsite = len(Site)\n",
    "\n",
    "# Index transect\n",
    "Trans,It = indexall(xdata.SiteTrans.values)\n",
    "ntrans = len(Trans)\n",
    "\n",
    "# Method\n",
    "Method,Im = indexall(xdata.Method.values)\n",
    "\n",
    "# Direction\n",
    "Direction,Id = indexall(xdata.Direction.values)\n",
    "iout = np.array([i for i, x in enumerate(xdata.Direction.values) if x=='Out'])\n",
    "iback = np.array([i for i, x in enumerate(xdata.Direction.values) if x=='Back'])\n",
    "\n",
    "# Observer\n",
    "Observer,Io = indexall(xdata.Observer.values)\n",
    "npeeps = len(Observer)\n",
    "\n",
    "# Diver time\n",
    "RTime = xdata.RTime.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(['Length', 'Count'], ['Out', 'Back'], ['AC', 'IM', 'ME'])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Method, Direction, Observer"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Set up data loop"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "Var_names = ['Acanthuridae', 'Chaetodontidae', 'Labridae', 'Lutjanidae', 'Scaridae', 'Serranidae', 'Siganidae','Richness','Abundance']\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Bayesian hierarhical model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Shared data trick from https://github.com/pymc-devs/pymc3/issues/1825\n",
    "shared_data = th.shared(xdata[Var_names[0]].values, borrow=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "Model = pm.Model()\n",
    "\n",
    "with Model:\n",
    "    # Global estimate\n",
    "    γ0 = pm.Normal('Global_intercept', mu=0, tau=0.001)\n",
    "    # Errors among Sites\n",
    "    σ0 = pm.Uniform('Site_SD', lower=0., upper=100)\n",
    "    τ0 = σ0**-2\n",
    "    \n",
    "    # Site-level estimates\n",
    "    κ0 = pm.Normal('Site_intercept', mu=γ0, tau=τ0, shape=nsite)\n",
    "    \n",
    "    # Errors within Sites\n",
    "    σ1 = pm.Uniform('Trans_SD', lower=0., upper=100)\n",
    "    τ1 = σ1**-2\n",
    "    \n",
    "    # Transect-level intercepts\n",
    "    β0 = pm.Normal('Transect_intercept', mu=κ0[Is], tau=τ1, shape=ntrans)\n",
    "    βt = pm.Normal('Direction_'+Direction[1], mu=0.0, tau=0.001)\n",
    "    \n",
    "    # Transect-level covariates\n",
    "    β1 = pm.Normal('Method_'+Method[1], mu=0.0, tau=0.001)\n",
    "    β2 = pm.Normal('Time_lag', mu=0.0, tau=0.001)\n",
    "    b3 = pm.Normal('Observer', mu=0.0, tau=0.001, shape=npeeps-1)\n",
    "    β3 = tt.set_subtensor(tt.zeros(shape=npeeps)[1:], b3)\n",
    "    \n",
    "    # NB rate\n",
    "    λ = pm.Deterministic('lambda_out', tt.exp(β0[It]+βt*Id+β1*Im+β2*RTime+β3[Io]))\n",
    "    \n",
    "    # NB dispersion\n",
    "    α = pm.Gamma('alpha', alpha=0.001, beta=0.001)\n",
    "    \n",
    "    # Likelihoods\n",
    "    Yi = pm.NegativeBinomial('Yi', mu=λ, alpha=α, observed=shared_data)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 20000/20000 [00:33<00:00, 594.81it/s]\n",
      "100%|██████████| 20000/20000 [00:32<00:00, 608.20it/s]\n",
      "100%|██████████| 20000/20000 [00:33<00:00, 590.27it/s]\n",
      "100%|██████████| 20000/20000 [00:32<00:00, 612.69it/s]\n",
      "100%|██████████| 20000/20000 [00:33<00:00, 594.33it/s]\n",
      "100%|██████████| 20000/20000 [00:33<00:00, 593.66it/s]\n",
      "100%|██████████| 20000/20000 [00:33<00:00, 598.19it/s]\n",
      "100%|██████████| 20000/20000 [00:31<00:00, 777.27it/s]\n",
      "100%|██████████| 20000/20000 [00:32<00:00, 612.83it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'\\n# Sampling\\nwith Model:\\n    step = pm.Metropolis()\\n    # Draw samples\\n    trace = pm.sample(20000, step=step, progressbar=True)\\n#'"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "#\"\"\"\n",
    "# Iterate over datasets\n",
    "for var in Var_names:\n",
    "    # Update new response variable\n",
    "    shared_data.set_value(xdata[var].values)\n",
    "    # Sampling\n",
    "    with Model:\n",
    "        step = pm.Metropolis()\n",
    "        # Draw samples\n",
    "        pm.sample(20000, progressbar=True, step=step, trace=SQLite(var+'_trace.db'))\n",
    "#\"\"\"\n",
    "\n",
    "\"\"\"\n",
    "# Sampling\n",
    "with Model:\n",
    "    step = pm.Metropolis()\n",
    "    # Draw samples\n",
    "    trace = pm.sample(20000, step=step, progressbar=True)\n",
    "#\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('Method_Count', 'Direction_Back')"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'Method_'+Method[1], 'Direction_'+Direction[1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "Next import those traces and plot..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "#pm.traceplot(trace);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}