lesolorzanov/mergiwild.ipynb

## mergiwild.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import json\n",
    "import IPython.display\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "images\n",
      "info\n",
      "annotations\n",
      "categories\n",
      "images\n",
      "info\n",
      "annotations\n",
      "categories\n"
     ]
    }
   ],
   "source": [
    "f=\"/home/leslie/Documents/Uppsala/courses/DLreadingCourse/iwildstuff/iWildCam_2019_iNat_Idaho/iWildCam_2019_iNat_Idaho.json\"\n",
    "with open(f) as json_file:  \n",
    "    idahodata = json.load(json_file)\n",
    "    for p in idahodata:\n",
    "        print(p)\n",
    "        \n",
    "f=\"/home/leslie/Documents/Uppsala/courses/DLreadingCourse/iwildstuff/iWildCam_2019_CCT.json\"\n",
    "with open(f) as json_file:  \n",
    "    cctdata = json.load(json_file)\n",
    "    for p in cctdata:\n",
    "        print(p)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "25263\n",
      "25263\n",
      "23\n",
      "196157\n",
      "196157\n",
      "23\n"
     ]
    }
   ],
   "source": [
    "idaho_images_df=pd.DataFrame(idahodata[\"images\"])\n",
    "idaho_annotations_df=pd.DataFrame(idahodata[\"annotations\"])\n",
    "idaho_categories_df=pd.DataFrame(idahodata[\"categories\"])\n",
    "\n",
    "cct_images_df=pd.DataFrame(cctdata[\"images\"])\n",
    "cct_annotations_df=pd.DataFrame(cctdata[\"annotations\"])\n",
    "cct_categories_df=pd.DataFrame(cctdata[\"categories\"])\n",
    "\n",
    "print(len(idaho_images_df))\n",
    "print(len(idaho_annotations_df))\n",
    "print(len(idaho_categories_df))\n",
    "print(len(cct_images_df))\n",
    "print(len(cct_annotations_df))\n",
    "print(len(cct_categories_df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-idaho_images_df\n",
      "['file_name', 'height', 'id', 'inat_year', 'license', 'rights_holder', 'width']\n",
      "\n",
      "\n",
      "-idaho_annotations_df\n",
      "['category_id', 'id', 'image_id', 'inat_species']\n",
      "\n",
      "\n",
      "-idaho_categories_df\n",
      "['id', 'inat_species_list', 'name']\n",
      "\n",
      "\n",
      "-cct_images_df\n",
      "['date_captured', 'file_name', 'frame_num', 'height', 'id', 'location', 'rights_holder', 'seq_id', 'seq_num_frames', 'width']\n",
      "\n",
      "\n",
      "-cct_annotations_df\n",
      "['category_id', 'id', 'image_id']\n",
      "\n",
      "\n",
      "-cct_categories_df\n",
      "['id', 'name']\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(\"-idaho_images_df\")\n",
    "s= [str(x) for x in list(idaho_images_df)] \n",
    "print(s);print(\"\\n\")\n",
    "\n",
    "print(\"-idaho_annotations_df\")\n",
    "s= [str(x) for x in list(idaho_annotations_df)] \n",
    "print(s);print(\"\\n\")\n",
    " \n",
    "print(\"-idaho_categories_df\")\n",
    "s= [str(x) for x in list(idaho_categories_df)] \n",
    "print(s);print(\"\\n\")\n",
    "\n",
    "print(\"-cct_images_df\")\n",
    "s= [str(x) for x in list(cct_images_df)] \n",
    "print(s);print(\"\\n\")\n",
    " \n",
    "print(\"-cct_annotations_df\")\n",
    "s= [str(x) for x in list(cct_annotations_df)] \n",
    "print(s);print(\"\\n\")\n",
    "\n",
    "print(\"-cct_categories_df\")\n",
    "s= [str(x) for x in list(cct_categories_df)] \n",
    "print(s);print(\"\\n\")\n",
    "\n",
    "keepids=['file_name', 'height', 'id', 'width']\n",
    " "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Idaho dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "idahocols=idaho_images_df.columns\n",
    "for c in idahocols:\n",
    "    if c not in keepids:\n",
    "        idaho_images_df=idaho_images_df.drop(c,axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file_name</th>\n",
       "      <th>height</th>\n",
       "      <th>image_id</th>\n",
       "      <th>width</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>train_val2017/Mammalia/Marmota flaviventris/6e...</td>\n",
       "      <td>532</td>\n",
       "      <td>1</td>\n",
       "      <td>800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>train_val2017/Mammalia/Marmota flaviventris/dc...</td>\n",
       "      <td>533</td>\n",
       "      <td>2</td>\n",
       "      <td>800</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                           file_name  height  image_id  width\n",
       "0  train_val2017/Mammalia/Marmota flaviventris/6e...     532         1    800\n",
       "1  train_val2017/Mammalia/Marmota flaviventris/dc...     533         2    800"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "idaho_images_df.rename(columns={\"id\":\"image_id\"},inplace=True)\n",
    "idaho_images_df.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>category_id</th>\n",
       "      <th>id</th>\n",
       "      <th>image_id</th>\n",
       "      <th>inat_species</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Marmota flaviventris</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>Marmota flaviventris</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   category_id  id  image_id          inat_species\n",
       "0            3   1         1  Marmota flaviventris\n",
       "1            3   2         2  Marmota flaviventris"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "idaho_annotations_df.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file_name</th>\n",
       "      <th>height</th>\n",
       "      <th>image_id</th>\n",
       "      <th>width</th>\n",
       "      <th>category_id</th>\n",
       "      <th>id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>train_val2017/Mammalia/Marmota flaviventris/6e...</td>\n",
       "      <td>532</td>\n",
       "      <td>1</td>\n",
       "      <td>800</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>train_val2017/Mammalia/Marmota flaviventris/dc...</td>\n",
       "      <td>533</td>\n",
       "      <td>2</td>\n",
       "      <td>800</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                           file_name  height  image_id  width  \\\n",
       "0  train_val2017/Mammalia/Marmota flaviventris/6e...     532         1    800   \n",
       "1  train_val2017/Mammalia/Marmota flaviventris/dc...     533         2    800   \n",
       "\n",
       "   category_id  id  \n",
       "0            3   1  \n",
       "1            3   2  "
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "idaho_merged=pd.merge(idaho_images_df,idaho_annotations_df,on=\"image_id\")\n",
    "idaho_merged=idaho_merged.drop(\"inat_species\",axis=1)\n",
    "idaho_merged.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file_name</th>\n",
       "      <th>height</th>\n",
       "      <th>image_id</th>\n",
       "      <th>width</th>\n",
       "      <th>category_id</th>\n",
       "      <th>id</th>\n",
       "      <th>dataset</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>train_val2017/Mammalia/Marmota flaviventris/6e...</td>\n",
       "      <td>532</td>\n",
       "      <td>1</td>\n",
       "      <td>800</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>idaho</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>train_val2017/Mammalia/Marmota flaviventris/dc...</td>\n",
       "      <td>533</td>\n",
       "      <td>2</td>\n",
       "      <td>800</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>idaho</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                           file_name  height  image_id  width  \\\n",
       "0  train_val2017/Mammalia/Marmota flaviventris/6e...     532         1    800   \n",
       "1  train_val2017/Mammalia/Marmota flaviventris/dc...     533         2    800   \n",
       "\n",
       "   category_id  id dataset  \n",
       "0            3   1   idaho  \n",
       "1            3   2   idaho  "
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "idaho_merged.insert(6, \"dataset\", \"idaho\")\n",
    "idaho_merged.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file_name</th>\n",
       "      <th>height</th>\n",
       "      <th>image_id</th>\n",
       "      <th>width</th>\n",
       "      <th>category_id</th>\n",
       "      <th>id</th>\n",
       "      <th>dataset</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>train_val2017/Mammalia/Marmota flaviventris/6e...</td>\n",
       "      <td>532</td>\n",
       "      <td>1</td>\n",
       "      <td>800</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>idaho</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>train_val2017/Mammalia/Marmota flaviventris/dc...</td>\n",
       "      <td>533</td>\n",
       "      <td>2</td>\n",
       "      <td>800</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>idaho</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                           file_name  height  image_id  width  \\\n",
       "0  train_val2017/Mammalia/Marmota flaviventris/6e...     532         1    800   \n",
       "1  train_val2017/Mammalia/Marmota flaviventris/dc...     533         2    800   \n",
       "\n",
       "   category_id  id dataset  \n",
       "0            3   1   idaho  \n",
       "1            3   2   idaho  "
      ]
     },
     "execution_count": 127,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(idaho_merged)\n",
    "idaho_merged.head(2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# CCT dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "cctcols=cct_images_df.columns\n",
    "for c in cctcols:\n",
    "    if c not in keepids:\n",
    "        print(c)\n",
    "        cct_images_df=cct_images_df.drop(c,axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file_name</th>\n",
       "      <th>height</th>\n",
       "      <th>image_id</th>\n",
       "      <th>width</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n",
       "      <td>1494</td>\n",
       "      <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "      <td>2048</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>588a679f-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n",
       "      <td>1494</td>\n",
       "      <td>588a679f-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "      <td>2048</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  file_name  height  \\\n",
       "0  5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg    1494   \n",
       "1  588a679f-23d2-11e8-a6a3-ec086b02610b.jpg    1494   \n",
       "\n",
       "                               image_id  width  \n",
       "0  5998cfa4-23d2-11e8-a6a3-ec086b02610b   2048  \n",
       "1  588a679f-23d2-11e8-a6a3-ec086b02610b   2048  "
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cct_images_df.rename(columns={\"id\":\"image_id\"},inplace=True)\n",
    "cct_images_df.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>category_id</th>\n",
       "      <th>id</th>\n",
       "      <th>image_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>19</td>\n",
       "      <td>2a545480-cbf1-11e8-819c-970a9450cdbc</td>\n",
       "      <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>19</td>\n",
       "      <td>2a5455b6-cbf1-11e8-819c-970a9450cdbc</td>\n",
       "      <td>588a679f-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   category_id                                    id  \\\n",
       "0           19  2a545480-cbf1-11e8-819c-970a9450cdbc   \n",
       "1           19  2a5455b6-cbf1-11e8-819c-970a9450cdbc   \n",
       "\n",
       "                               image_id  \n",
       "0  5998cfa4-23d2-11e8-a6a3-ec086b02610b  \n",
       "1  588a679f-23d2-11e8-a6a3-ec086b02610b  "
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cct_annotations_df.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "cct_merged=pd.merge(cct_images_df,cct_annotations_df,on=\"image_id\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "collapsed": false,
    "hide_input": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file_name</th>\n",
       "      <th>height</th>\n",
       "      <th>image_id</th>\n",
       "      <th>width</th>\n",
       "      <th>category_id</th>\n",
       "      <th>id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n",
       "      <td>1494</td>\n",
       "      <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "      <td>2048</td>\n",
       "      <td>19</td>\n",
       "      <td>2a545480-cbf1-11e8-819c-970a9450cdbc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>588a679f-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n",
       "      <td>1494</td>\n",
       "      <td>588a679f-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "      <td>2048</td>\n",
       "      <td>19</td>\n",
       "      <td>2a5455b6-cbf1-11e8-819c-970a9450cdbc</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  file_name  height  \\\n",
       "0  5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg    1494   \n",
       "1  588a679f-23d2-11e8-a6a3-ec086b02610b.jpg    1494   \n",
       "\n",
       "                               image_id  width  category_id  \\\n",
       "0  5998cfa4-23d2-11e8-a6a3-ec086b02610b   2048           19   \n",
       "1  588a679f-23d2-11e8-a6a3-ec086b02610b   2048           19   \n",
       "\n",
       "                                     id  \n",
       "0  2a545480-cbf1-11e8-819c-970a9450cdbc  \n",
       "1  2a5455b6-cbf1-11e8-819c-970a9450cdbc  "
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cct_merged.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "cct_merged.insert(6, \"dataset\", \"cct\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file_name</th>\n",
       "      <th>height</th>\n",
       "      <th>image_id</th>\n",
       "      <th>width</th>\n",
       "      <th>category_id</th>\n",
       "      <th>id</th>\n",
       "      <th>dataset</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n",
       "      <td>1494</td>\n",
       "      <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "      <td>2048</td>\n",
       "      <td>19</td>\n",
       "      <td>2a545480-cbf1-11e8-819c-970a9450cdbc</td>\n",
       "      <td>cct</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>588a679f-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n",
       "      <td>1494</td>\n",
       "      <td>588a679f-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "      <td>2048</td>\n",
       "      <td>19</td>\n",
       "      <td>2a5455b6-cbf1-11e8-819c-970a9450cdbc</td>\n",
       "      <td>cct</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  file_name  height  \\\n",
       "0  5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg    1494   \n",
       "1  588a679f-23d2-11e8-a6a3-ec086b02610b.jpg    1494   \n",
       "\n",
       "                               image_id  width  category_id  \\\n",
       "0  5998cfa4-23d2-11e8-a6a3-ec086b02610b   2048           19   \n",
       "1  588a679f-23d2-11e8-a6a3-ec086b02610b   2048           19   \n",
       "\n",
       "                                     id dataset  \n",
       "0  2a545480-cbf1-11e8-819c-970a9450cdbc     cct  \n",
       "1  2a5455b6-cbf1-11e8-819c-970a9450cdbc     cct  "
      ]
     },
     "execution_count": 128,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cct_merged.head(2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Append them"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file_name</th>\n",
       "      <th>height</th>\n",
       "      <th>image_id</th>\n",
       "      <th>width</th>\n",
       "      <th>category_id</th>\n",
       "      <th>id</th>\n",
       "      <th>dataset</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>train_val2017/Mammalia/Marmota flaviventris/6e...</td>\n",
       "      <td>532</td>\n",
       "      <td>1</td>\n",
       "      <td>800</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>idaho</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>train_val2017/Mammalia/Marmota flaviventris/dc...</td>\n",
       "      <td>533</td>\n",
       "      <td>2</td>\n",
       "      <td>800</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>idaho</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                           file_name  height image_id  width  \\\n",
       "0  train_val2017/Mammalia/Marmota flaviventris/6e...     532        1    800   \n",
       "1  train_val2017/Mammalia/Marmota flaviventris/dc...     533        2    800   \n",
       "\n",
       "   category_id id dataset  \n",
       "0            3  1   idaho  \n",
       "1            3  2   idaho  "
      ]
     },
     "execution_count": 129,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "extra_df=idaho_merged.append(cct_merged, ignore_index=True)\n",
    "extra_df.head(2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get values corresponding to complementary classes\n",
    " This is from the idaho dataset only!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "idaho_needed=[2,5,6,7,9,12,15,20,21,22] #in idaho labels are still numbers\n",
    "#idaho_needed=[\"2\",\"5\",\"6\",\"7\",\"9\",\"12\",\"15\",\"20\",\"21\",\"22\"]\n",
    "try_anno_from_these=idaho_merged.loc[idaho_merged[\"category_id\"].isin(idaho_needed)]\n",
    "try_anno_from_these.to_csv(\"/home/leslie/Documents/Uppsala/courses/DLreadingCourse/iwildstuff/idaho_necessary.csv\",index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## So, bad news, this dataset is messy AF.\n",
    "\n",
    "CCT bboxes are actually the bounding boxes of what they call iWild. CCT without bboxes is what they call CCT big. and CCT small has no json or csv. Basically CCT small is useless.\n",
    "\n",
    "bboxes are of the type: [x, y, width, height]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "images\n",
      "info\n",
      "annotations\n",
      "categories\n"
     ]
    }
   ],
   "source": [
    "f=\"/home/leslie/Documents/Uppsala/courses/DLreadingCourse/iwildstuff/iWildCam_2019_CCT_Bboxes.json\"\n",
    "with open(f) as json_file:  \n",
    "    cctbboxdata = json.load(json_file)\n",
    "    for p in cctbboxdata:\n",
    "        print(p)\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-cctbbox_images_df\n",
      "['date_captured', 'file_name', 'frame_num', 'height', 'id', 'location', 'rights_holder', 'seq_id', 'seq_num_frames', 'width']\n",
      "\n",
      "\n",
      "-cctbbox_annotations_df\n",
      "['bbox', 'category_id', 'id', 'image_id']\n",
      "\n",
      "\n",
      "-cctbbox_categories_df\n",
      "['id', 'name']\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "cctbbox_images_df=pd.DataFrame(cctbboxdata[\"images\"])\n",
    "cctbbox_annotations_df=pd.DataFrame(cctbboxdata[\"annotations\"])\n",
    "cctbbox_categories_df=pd.DataFrame(cctbboxdata[\"categories\"])\n",
    "\n",
    "print(\"-cctbbox_images_df\")\n",
    "s= [str(x) for x in list(cctbbox_images_df)] \n",
    "print(s);print(\"\\n\")\n",
    "\n",
    "print(\"-cctbbox_annotations_df\")\n",
    "s= [str(x) for x in list(cctbbox_annotations_df)] \n",
    "print(s);print(\"\\n\")\n",
    " \n",
    "print(\"-cctbbox_categories_df\")\n",
    "s= [str(x) for x in list(cctbbox_categories_df)] \n",
    "print(s);print(\"\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date_captured</th>\n",
       "      <th>file_name</th>\n",
       "      <th>frame_num</th>\n",
       "      <th>height</th>\n",
       "      <th>image_id</th>\n",
       "      <th>location</th>\n",
       "      <th>rights_holder</th>\n",
       "      <th>seq_id</th>\n",
       "      <th>seq_num_frames</th>\n",
       "      <th>width</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2011-05-13 23:43:18</td>\n",
       "      <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n",
       "      <td>1</td>\n",
       "      <td>1494</td>\n",
       "      <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "      <td>33</td>\n",
       "      <td>Justin Brown</td>\n",
       "      <td>6f084ccc-5567-11e8-bc84-dca9047ef277</td>\n",
       "      <td>3</td>\n",
       "      <td>2048</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         date_captured                                 file_name  frame_num  \\\n",
       "0  2011-05-13 23:43:18  5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg          1   \n",
       "\n",
       "   height                              image_id  location rights_holder  \\\n",
       "0    1494  5998cfa4-23d2-11e8-a6a3-ec086b02610b        33  Justin Brown   \n",
       "\n",
       "                                 seq_id  seq_num_frames  width  \n",
       "0  6f084ccc-5567-11e8-bc84-dca9047ef277               3   2048  "
      ]
     },
     "execution_count": 119,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cctbbox_images_df=cctbbox_images_df.rename(columns={\"id\":\"image_id\"})\n",
    "cctbbox_images_df.head(1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bbox</th>\n",
       "      <th>category_id</th>\n",
       "      <th>id</th>\n",
       "      <th>image_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[499.2, 711.68, 353.28, 199.68]</td>\n",
       "      <td>19</td>\n",
       "      <td>2a545520-cbf1-11e8-819c-970a9450cdbc</td>\n",
       "      <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[985.6, 783.36, 368.64, 230.4]</td>\n",
       "      <td>19</td>\n",
       "      <td>2a54562e-cbf1-11e8-819c-970a9450cdbc</td>\n",
       "      <td>588a679f-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                              bbox  category_id  \\\n",
       "0  [499.2, 711.68, 353.28, 199.68]           19   \n",
       "1   [985.6, 783.36, 368.64, 230.4]           19   \n",
       "\n",
       "                                     id                              image_id  \n",
       "0  2a545520-cbf1-11e8-819c-970a9450cdbc  5998cfa4-23d2-11e8-a6a3-ec086b02610b  \n",
       "1  2a54562e-cbf1-11e8-819c-970a9450cdbc  588a679f-23d2-11e8-a6a3-ec086b02610b  "
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cctbbox_annotations_df.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {
    "collapsed": false,
    "hide_input": true
   },
   "outputs": [],
   "source": [
    "#this was just to check that all images wuith bboxes are actually real files in the iwild folder\n",
    "#ps: yes, they are\n",
    "# arr=cctbbox_images_df[\"file_name\"].values\n",
    "# location=\"/media/leslie/CBAEXT4/iwildcam_2019/train_images/\"\n",
    "# existsIniWild=[]\n",
    "# doesnotexistiniWild=[]\n",
    "# for im in arr:\n",
    "#     exists = os.path.isfile(location+str(im))\n",
    "#     if(exists):\n",
    "#         existsIniWild.append(im)\n",
    "#     else:\n",
    "#         doesnotexistiniWild.append(im)\n",
    "\n",
    "# print(len(existsIniWild))\n",
    "# print(len(doesnotexistiniWild))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 107,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(cctbbox_annotations_df.iloc[0][\"bbox\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#convert the bbox list into different columns\n",
    "def bboxtocols(row,num):\n",
    "    bboxlist=list(row[\"bbox\"])\n",
    "    return int(bboxlist[num]//2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bbox</th>\n",
       "      <th>category_id</th>\n",
       "      <th>id</th>\n",
       "      <th>image_id</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>w</th>\n",
       "      <th>h</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[499.2, 711.68, 353.28, 199.68]</td>\n",
       "      <td>19</td>\n",
       "      <td>2a545520-cbf1-11e8-819c-970a9450cdbc</td>\n",
       "      <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "      <td>249</td>\n",
       "      <td>355</td>\n",
       "      <td>176</td>\n",
       "      <td>99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[985.6, 783.36, 368.64, 230.4]</td>\n",
       "      <td>19</td>\n",
       "      <td>2a54562e-cbf1-11e8-819c-970a9450cdbc</td>\n",
       "      <td>588a679f-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "      <td>492</td>\n",
       "      <td>391</td>\n",
       "      <td>184</td>\n",
       "      <td>115</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                              bbox  category_id  \\\n",
       "0  [499.2, 711.68, 353.28, 199.68]           19   \n",
       "1   [985.6, 783.36, 368.64, 230.4]           19   \n",
       "\n",
       "                                     id                              image_id  \\\n",
       "0  2a545520-cbf1-11e8-819c-970a9450cdbc  5998cfa4-23d2-11e8-a6a3-ec086b02610b   \n",
       "1  2a54562e-cbf1-11e8-819c-970a9450cdbc  588a679f-23d2-11e8-a6a3-ec086b02610b   \n",
       "\n",
       "     x    y    w    h  \n",
       "0  249  355  176   99  \n",
       "1  492  391  184  115  "
      ]
     },
     "execution_count": 115,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#convert the bbox list into different columns\n",
    "cctbbox_annotations_df[\"x\"]=cctbbox_annotations_df.apply(lambda row: bboxtocols(row,0), axis=1)\n",
    "cctbbox_annotations_df[\"y\"]=cctbbox_annotations_df.apply(lambda row: bboxtocols(row,1), axis=1)\n",
    "cctbbox_annotations_df[\"w\"]=cctbbox_annotations_df.apply(lambda row: bboxtocols(row,2), axis=1)\n",
    "cctbbox_annotations_df[\"h\"]=cctbbox_annotations_df.apply(lambda row: bboxtocols(row,3), axis=1)\n",
    "cctbbox_annotations_df.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>category_id</th>\n",
       "      <th>id</th>\n",
       "      <th>image_id</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>w</th>\n",
       "      <th>h</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>19</td>\n",
       "      <td>2a545520-cbf1-11e8-819c-970a9450cdbc</td>\n",
       "      <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "      <td>249</td>\n",
       "      <td>355</td>\n",
       "      <td>176</td>\n",
       "      <td>99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>19</td>\n",
       "      <td>2a54562e-cbf1-11e8-819c-970a9450cdbc</td>\n",
       "      <td>588a679f-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "      <td>492</td>\n",
       "      <td>391</td>\n",
       "      <td>184</td>\n",
       "      <td>115</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   category_id                                    id  \\\n",
       "0           19  2a545520-cbf1-11e8-819c-970a9450cdbc   \n",
       "1           19  2a54562e-cbf1-11e8-819c-970a9450cdbc   \n",
       "\n",
       "                               image_id    x    y    w    h  \n",
       "0  5998cfa4-23d2-11e8-a6a3-ec086b02610b  249  355  176   99  \n",
       "1  588a679f-23d2-11e8-a6a3-ec086b02610b  492  391  184  115  "
      ]
     },
     "execution_count": 117,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#drop the bbox column\n",
    "cctbbox_annotations_df=cctbbox_annotations_df.drop(\"bbox\",axis=1)\n",
    "cctbbox_annotations_df.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date_captured</th>\n",
       "      <th>file_name</th>\n",
       "      <th>frame_num</th>\n",
       "      <th>height</th>\n",
       "      <th>image_id</th>\n",
       "      <th>location</th>\n",
       "      <th>rights_holder</th>\n",
       "      <th>seq_id</th>\n",
       "      <th>seq_num_frames</th>\n",
       "      <th>width</th>\n",
       "      <th>category_id</th>\n",
       "      <th>dataset</th>\n",
       "      <th>id</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>w</th>\n",
       "      <th>h</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2011-05-13 23:43:18</td>\n",
       "      <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n",
       "      <td>1</td>\n",
       "      <td>1494</td>\n",
       "      <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "      <td>33</td>\n",
       "      <td>Justin Brown</td>\n",
       "      <td>6f084ccc-5567-11e8-bc84-dca9047ef277</td>\n",
       "      <td>3</td>\n",
       "      <td>2048</td>\n",
       "      <td>19</td>\n",
       "      <td>iwild</td>\n",
       "      <td>2a545520-cbf1-11e8-819c-970a9450cdbc</td>\n",
       "      <td>249</td>\n",
       "      <td>355</td>\n",
       "      <td>176</td>\n",
       "      <td>99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2012-03-17 03:48:44</td>\n",
       "      <td>588a679f-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n",
       "      <td>2</td>\n",
       "      <td>1494</td>\n",
       "      <td>588a679f-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "      <td>115</td>\n",
       "      <td>Justin Brown</td>\n",
       "      <td>6f12067d-5567-11e8-b3c0-dca9047ef277</td>\n",
       "      <td>3</td>\n",
       "      <td>2048</td>\n",
       "      <td>19</td>\n",
       "      <td>iwild</td>\n",
       "      <td>2a54562e-cbf1-11e8-819c-970a9450cdbc</td>\n",
       "      <td>492</td>\n",
       "      <td>391</td>\n",
       "      <td>184</td>\n",
       "      <td>115</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         date_captured                                 file_name  frame_num  \\\n",
       "0  2011-05-13 23:43:18  5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg          1   \n",
       "1  2012-03-17 03:48:44  588a679f-23d2-11e8-a6a3-ec086b02610b.jpg          2   \n",
       "\n",
       "   height                              image_id  location rights_holder  \\\n",
       "0    1494  5998cfa4-23d2-11e8-a6a3-ec086b02610b        33  Justin Brown   \n",
       "1    1494  588a679f-23d2-11e8-a6a3-ec086b02610b       115  Justin Brown   \n",
       "\n",
       "                                 seq_id  seq_num_frames  width  category_id  \\\n",
       "0  6f084ccc-5567-11e8-bc84-dca9047ef277               3   2048           19   \n",
       "1  6f12067d-5567-11e8-b3c0-dca9047ef277               3   2048           19   \n",
       "\n",
       "  dataset                                    id    x    y    w    h  \n",
       "0   iwild  2a545520-cbf1-11e8-819c-970a9450cdbc  249  355  176   99  \n",
       "1   iwild  2a54562e-cbf1-11e8-819c-970a9450cdbc  492  391  184  115  "
      ]
     },
     "execution_count": 123,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#merge images with annotations\n",
    "cctbbox_merged=pd.merge(cctbbox_images_df,cctbbox_annotations_df,on=\"image_id\")\n",
    "#create a column called dataset to know from which one it comes\n",
    "cctbbox_merged.insert(11, \"dataset\", \"iwild\")\n",
    "cctbbox_merged.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "date_captured\n",
      "frame_num\n",
      "height\n",
      "location\n",
      "rights_holder\n",
      "seq_id\n",
      "seq_num_frames\n",
      "width\n",
      "id\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>file_name</th>\n",
       "      <th>image_id</th>\n",
       "      <th>category_id</th>\n",
       "      <th>dataset</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>w</th>\n",
       "      <th>h</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n",
       "      <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "      <td>19</td>\n",
       "      <td>iwild</td>\n",
       "      <td>249</td>\n",
       "      <td>355</td>\n",
       "      <td>176</td>\n",
       "      <td>99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>588a679f-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n",
       "      <td>588a679f-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "      <td>19</td>\n",
       "      <td>iwild</td>\n",
       "      <td>492</td>\n",
       "      <td>391</td>\n",
       "      <td>184</td>\n",
       "      <td>115</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  file_name  \\\n",
       "0  5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg   \n",
       "1  588a679f-23d2-11e8-a6a3-ec086b02610b.jpg   \n",
       "\n",
       "                               image_id  category_id dataset    x    y    w  \\\n",
       "0  5998cfa4-23d2-11e8-a6a3-ec086b02610b           19   iwild  249  355  176   \n",
       "1  588a679f-23d2-11e8-a6a3-ec086b02610b           19   iwild  492  391  184   \n",
       "\n",
       "     h  \n",
       "0   99  \n",
       "1  115  "
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#drop all useless columns. Keep: category, image_id, file_name, dataset, x, y, w, h\n",
    "keepids=[\"category_id\",\"image_id\",\"file_name\",\"dataset\",\"x\",\"y\",\"w\",\"h\"]\n",
    "cctbboxcols=cctbbox_merged.columns\n",
    "for c in cctbboxcols:\n",
    "    if c not in keepids:\n",
    "        print(c)\n",
    "        cctbbox_merged=cctbbox_merged.drop(c,axis=1)\n",
    "    \n",
    "cctbbox_merged.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>category_id</th>\n",
       "      <th>image_id</th>\n",
       "      <th>file_name</th>\n",
       "      <th>dataset</th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>w</th>\n",
       "      <th>h</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>19</td>\n",
       "      <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "      <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n",
       "      <td>iwild</td>\n",
       "      <td>249</td>\n",
       "      <td>355</td>\n",
       "      <td>176</td>\n",
       "      <td>99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>19</td>\n",
       "      <td>588a679f-23d2-11e8-a6a3-ec086b02610b</td>\n",
       "      <td>588a679f-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n",
       "      <td>iwild</td>\n",
       "      <td>492</td>\n",
       "      <td>391</td>\n",
       "      <td>184</td>\n",
       "      <td>115</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   category_id                              image_id  \\\n",
       "0           19  5998cfa4-23d2-11e8-a6a3-ec086b02610b   \n",
       "1           19  588a679f-23d2-11e8-a6a3-ec086b02610b   \n",
       "\n",
       "                                  file_name dataset    x    y    w    h  \n",
       "0  5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg   iwild  249  355  176   99  \n",
       "1  588a679f-23d2-11e8-a6a3-ec086b02610b.jpg   iwild  492  391  184  115  "
      ]
     },
     "execution_count": 125,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#now organize them as our colleague is expecting them\n",
    "cctbbox_merged=cctbbox_merged[[\"category_id\",\"image_id\",\"file_name\",\"dataset\",\"x\",\"y\",\"w\",\"h\"]]\n",
    "cctbbox_merged.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "cctbbox_merged.to_csv(\"/home/leslie/Documents/Uppsala/courses/DLreadingCourse/iwildstuff/cctWithBboxesForiWild.csv\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}