Last active
May 31, 2019 17:39
-
-
Save lesolorzanov/50000c4631d994292e25d8a2dea73dce to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 93, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import json\n", | |
"import IPython.display\n", | |
"import os" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"images\n", | |
"info\n", | |
"annotations\n", | |
"categories\n", | |
"images\n", | |
"info\n", | |
"annotations\n", | |
"categories\n" | |
] | |
} | |
], | |
"source": [ | |
"f=\"/home/leslie/Documents/Uppsala/courses/DLreadingCourse/iwildstuff/iWildCam_2019_iNat_Idaho/iWildCam_2019_iNat_Idaho.json\"\n", | |
"with open(f) as json_file: \n", | |
" idahodata = json.load(json_file)\n", | |
" for p in idahodata:\n", | |
" print(p)\n", | |
" \n", | |
"f=\"/home/leslie/Documents/Uppsala/courses/DLreadingCourse/iwildstuff/iWildCam_2019_CCT.json\"\n", | |
"with open(f) as json_file: \n", | |
" cctdata = json.load(json_file)\n", | |
" for p in cctdata:\n", | |
" print(p)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"25263\n", | |
"25263\n", | |
"23\n", | |
"196157\n", | |
"196157\n", | |
"23\n" | |
] | |
} | |
], | |
"source": [ | |
"idaho_images_df=pd.DataFrame(idahodata[\"images\"])\n", | |
"idaho_annotations_df=pd.DataFrame(idahodata[\"annotations\"])\n", | |
"idaho_categories_df=pd.DataFrame(idahodata[\"categories\"])\n", | |
"\n", | |
"cct_images_df=pd.DataFrame(cctdata[\"images\"])\n", | |
"cct_annotations_df=pd.DataFrame(cctdata[\"annotations\"])\n", | |
"cct_categories_df=pd.DataFrame(cctdata[\"categories\"])\n", | |
"\n", | |
"print(len(idaho_images_df))\n", | |
"print(len(idaho_annotations_df))\n", | |
"print(len(idaho_categories_df))\n", | |
"print(len(cct_images_df))\n", | |
"print(len(cct_annotations_df))\n", | |
"print(len(cct_categories_df))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"-idaho_images_df\n", | |
"['file_name', 'height', 'id', 'inat_year', 'license', 'rights_holder', 'width']\n", | |
"\n", | |
"\n", | |
"-idaho_annotations_df\n", | |
"['category_id', 'id', 'image_id', 'inat_species']\n", | |
"\n", | |
"\n", | |
"-idaho_categories_df\n", | |
"['id', 'inat_species_list', 'name']\n", | |
"\n", | |
"\n", | |
"-cct_images_df\n", | |
"['date_captured', 'file_name', 'frame_num', 'height', 'id', 'location', 'rights_holder', 'seq_id', 'seq_num_frames', 'width']\n", | |
"\n", | |
"\n", | |
"-cct_annotations_df\n", | |
"['category_id', 'id', 'image_id']\n", | |
"\n", | |
"\n", | |
"-cct_categories_df\n", | |
"['id', 'name']\n", | |
"\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"print(\"-idaho_images_df\")\n", | |
"s= [str(x) for x in list(idaho_images_df)] \n", | |
"print(s);print(\"\\n\")\n", | |
"\n", | |
"print(\"-idaho_annotations_df\")\n", | |
"s= [str(x) for x in list(idaho_annotations_df)] \n", | |
"print(s);print(\"\\n\")\n", | |
" \n", | |
"print(\"-idaho_categories_df\")\n", | |
"s= [str(x) for x in list(idaho_categories_df)] \n", | |
"print(s);print(\"\\n\")\n", | |
"\n", | |
"print(\"-cct_images_df\")\n", | |
"s= [str(x) for x in list(cct_images_df)] \n", | |
"print(s);print(\"\\n\")\n", | |
" \n", | |
"print(\"-cct_annotations_df\")\n", | |
"s= [str(x) for x in list(cct_annotations_df)] \n", | |
"print(s);print(\"\\n\")\n", | |
"\n", | |
"print(\"-cct_categories_df\")\n", | |
"s= [str(x) for x in list(cct_categories_df)] \n", | |
"print(s);print(\"\\n\")\n", | |
"\n", | |
"keepids=['file_name', 'height', 'id', 'width']\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Idaho dataset" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"idahocols=idaho_images_df.columns\n", | |
"for c in idahocols:\n", | |
" if c not in keepids:\n", | |
" idaho_images_df=idaho_images_df.drop(c,axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>file_name</th>\n", | |
" <th>height</th>\n", | |
" <th>image_id</th>\n", | |
" <th>width</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>train_val2017/Mammalia/Marmota flaviventris/6e...</td>\n", | |
" <td>532</td>\n", | |
" <td>1</td>\n", | |
" <td>800</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>train_val2017/Mammalia/Marmota flaviventris/dc...</td>\n", | |
" <td>533</td>\n", | |
" <td>2</td>\n", | |
" <td>800</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" file_name height image_id width\n", | |
"0 train_val2017/Mammalia/Marmota flaviventris/6e... 532 1 800\n", | |
"1 train_val2017/Mammalia/Marmota flaviventris/dc... 533 2 800" | |
] | |
}, | |
"execution_count": 47, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"idaho_images_df.rename(columns={\"id\":\"image_id\"},inplace=True)\n", | |
"idaho_images_df.head(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>category_id</th>\n", | |
" <th>id</th>\n", | |
" <th>image_id</th>\n", | |
" <th>inat_species</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>Marmota flaviventris</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>Marmota flaviventris</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" category_id id image_id inat_species\n", | |
"0 3 1 1 Marmota flaviventris\n", | |
"1 3 2 2 Marmota flaviventris" | |
] | |
}, | |
"execution_count": 48, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"idaho_annotations_df.head(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 61, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>file_name</th>\n", | |
" <th>height</th>\n", | |
" <th>image_id</th>\n", | |
" <th>width</th>\n", | |
" <th>category_id</th>\n", | |
" <th>id</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>train_val2017/Mammalia/Marmota flaviventris/6e...</td>\n", | |
" <td>532</td>\n", | |
" <td>1</td>\n", | |
" <td>800</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>train_val2017/Mammalia/Marmota flaviventris/dc...</td>\n", | |
" <td>533</td>\n", | |
" <td>2</td>\n", | |
" <td>800</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" file_name height image_id width \\\n", | |
"0 train_val2017/Mammalia/Marmota flaviventris/6e... 532 1 800 \n", | |
"1 train_val2017/Mammalia/Marmota flaviventris/dc... 533 2 800 \n", | |
"\n", | |
" category_id id \n", | |
"0 3 1 \n", | |
"1 3 2 " | |
] | |
}, | |
"execution_count": 61, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"idaho_merged=pd.merge(idaho_images_df,idaho_annotations_df,on=\"image_id\")\n", | |
"idaho_merged=idaho_merged.drop(\"inat_species\",axis=1)\n", | |
"idaho_merged.head(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 70, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>file_name</th>\n", | |
" <th>height</th>\n", | |
" <th>image_id</th>\n", | |
" <th>width</th>\n", | |
" <th>category_id</th>\n", | |
" <th>id</th>\n", | |
" <th>dataset</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>train_val2017/Mammalia/Marmota flaviventris/6e...</td>\n", | |
" <td>532</td>\n", | |
" <td>1</td>\n", | |
" <td>800</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>idaho</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>train_val2017/Mammalia/Marmota flaviventris/dc...</td>\n", | |
" <td>533</td>\n", | |
" <td>2</td>\n", | |
" <td>800</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>idaho</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" file_name height image_id width \\\n", | |
"0 train_val2017/Mammalia/Marmota flaviventris/6e... 532 1 800 \n", | |
"1 train_val2017/Mammalia/Marmota flaviventris/dc... 533 2 800 \n", | |
"\n", | |
" category_id id dataset \n", | |
"0 3 1 idaho \n", | |
"1 3 2 idaho " | |
] | |
}, | |
"execution_count": 70, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"idaho_merged.insert(6, \"dataset\", \"idaho\")\n", | |
"idaho_merged.head(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 127, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>file_name</th>\n", | |
" <th>height</th>\n", | |
" <th>image_id</th>\n", | |
" <th>width</th>\n", | |
" <th>category_id</th>\n", | |
" <th>id</th>\n", | |
" <th>dataset</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>train_val2017/Mammalia/Marmota flaviventris/6e...</td>\n", | |
" <td>532</td>\n", | |
" <td>1</td>\n", | |
" <td>800</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>idaho</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>train_val2017/Mammalia/Marmota flaviventris/dc...</td>\n", | |
" <td>533</td>\n", | |
" <td>2</td>\n", | |
" <td>800</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>idaho</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" file_name height image_id width \\\n", | |
"0 train_val2017/Mammalia/Marmota flaviventris/6e... 532 1 800 \n", | |
"1 train_val2017/Mammalia/Marmota flaviventris/dc... 533 2 800 \n", | |
"\n", | |
" category_id id dataset \n", | |
"0 3 1 idaho \n", | |
"1 3 2 idaho " | |
] | |
}, | |
"execution_count": 127, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(idaho_merged)\n", | |
"idaho_merged.head(2)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# CCT dataset" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"cctcols=cct_images_df.columns\n", | |
"for c in cctcols:\n", | |
" if c not in keepids:\n", | |
" print(c)\n", | |
" cct_images_df=cct_images_df.drop(c,axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 54, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>file_name</th>\n", | |
" <th>height</th>\n", | |
" <th>image_id</th>\n", | |
" <th>width</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n", | |
" <td>1494</td>\n", | |
" <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" <td>2048</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>588a679f-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n", | |
" <td>1494</td>\n", | |
" <td>588a679f-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" <td>2048</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" file_name height \\\n", | |
"0 5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg 1494 \n", | |
"1 588a679f-23d2-11e8-a6a3-ec086b02610b.jpg 1494 \n", | |
"\n", | |
" image_id width \n", | |
"0 5998cfa4-23d2-11e8-a6a3-ec086b02610b 2048 \n", | |
"1 588a679f-23d2-11e8-a6a3-ec086b02610b 2048 " | |
] | |
}, | |
"execution_count": 54, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"cct_images_df.rename(columns={\"id\":\"image_id\"},inplace=True)\n", | |
"cct_images_df.head(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>category_id</th>\n", | |
" <th>id</th>\n", | |
" <th>image_id</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>19</td>\n", | |
" <td>2a545480-cbf1-11e8-819c-970a9450cdbc</td>\n", | |
" <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>19</td>\n", | |
" <td>2a5455b6-cbf1-11e8-819c-970a9450cdbc</td>\n", | |
" <td>588a679f-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" category_id id \\\n", | |
"0 19 2a545480-cbf1-11e8-819c-970a9450cdbc \n", | |
"1 19 2a5455b6-cbf1-11e8-819c-970a9450cdbc \n", | |
"\n", | |
" image_id \n", | |
"0 5998cfa4-23d2-11e8-a6a3-ec086b02610b \n", | |
"1 588a679f-23d2-11e8-a6a3-ec086b02610b " | |
] | |
}, | |
"execution_count": 52, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"cct_annotations_df.head(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 66, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"cct_merged=pd.merge(cct_images_df,cct_annotations_df,on=\"image_id\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 67, | |
"metadata": { | |
"collapsed": false, | |
"hide_input": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>file_name</th>\n", | |
" <th>height</th>\n", | |
" <th>image_id</th>\n", | |
" <th>width</th>\n", | |
" <th>category_id</th>\n", | |
" <th>id</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n", | |
" <td>1494</td>\n", | |
" <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" <td>2048</td>\n", | |
" <td>19</td>\n", | |
" <td>2a545480-cbf1-11e8-819c-970a9450cdbc</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>588a679f-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n", | |
" <td>1494</td>\n", | |
" <td>588a679f-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" <td>2048</td>\n", | |
" <td>19</td>\n", | |
" <td>2a5455b6-cbf1-11e8-819c-970a9450cdbc</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" file_name height \\\n", | |
"0 5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg 1494 \n", | |
"1 588a679f-23d2-11e8-a6a3-ec086b02610b.jpg 1494 \n", | |
"\n", | |
" image_id width category_id \\\n", | |
"0 5998cfa4-23d2-11e8-a6a3-ec086b02610b 2048 19 \n", | |
"1 588a679f-23d2-11e8-a6a3-ec086b02610b 2048 19 \n", | |
"\n", | |
" id \n", | |
"0 2a545480-cbf1-11e8-819c-970a9450cdbc \n", | |
"1 2a5455b6-cbf1-11e8-819c-970a9450cdbc " | |
] | |
}, | |
"execution_count": 67, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"cct_merged.head(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 68, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"cct_merged.insert(6, \"dataset\", \"cct\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 128, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>file_name</th>\n", | |
" <th>height</th>\n", | |
" <th>image_id</th>\n", | |
" <th>width</th>\n", | |
" <th>category_id</th>\n", | |
" <th>id</th>\n", | |
" <th>dataset</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n", | |
" <td>1494</td>\n", | |
" <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" <td>2048</td>\n", | |
" <td>19</td>\n", | |
" <td>2a545480-cbf1-11e8-819c-970a9450cdbc</td>\n", | |
" <td>cct</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>588a679f-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n", | |
" <td>1494</td>\n", | |
" <td>588a679f-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" <td>2048</td>\n", | |
" <td>19</td>\n", | |
" <td>2a5455b6-cbf1-11e8-819c-970a9450cdbc</td>\n", | |
" <td>cct</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" file_name height \\\n", | |
"0 5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg 1494 \n", | |
"1 588a679f-23d2-11e8-a6a3-ec086b02610b.jpg 1494 \n", | |
"\n", | |
" image_id width category_id \\\n", | |
"0 5998cfa4-23d2-11e8-a6a3-ec086b02610b 2048 19 \n", | |
"1 588a679f-23d2-11e8-a6a3-ec086b02610b 2048 19 \n", | |
"\n", | |
" id dataset \n", | |
"0 2a545480-cbf1-11e8-819c-970a9450cdbc cct \n", | |
"1 2a5455b6-cbf1-11e8-819c-970a9450cdbc cct " | |
] | |
}, | |
"execution_count": 128, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"cct_merged.head(2)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Append them" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 129, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>file_name</th>\n", | |
" <th>height</th>\n", | |
" <th>image_id</th>\n", | |
" <th>width</th>\n", | |
" <th>category_id</th>\n", | |
" <th>id</th>\n", | |
" <th>dataset</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>train_val2017/Mammalia/Marmota flaviventris/6e...</td>\n", | |
" <td>532</td>\n", | |
" <td>1</td>\n", | |
" <td>800</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>idaho</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>train_val2017/Mammalia/Marmota flaviventris/dc...</td>\n", | |
" <td>533</td>\n", | |
" <td>2</td>\n", | |
" <td>800</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>idaho</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" file_name height image_id width \\\n", | |
"0 train_val2017/Mammalia/Marmota flaviventris/6e... 532 1 800 \n", | |
"1 train_val2017/Mammalia/Marmota flaviventris/dc... 533 2 800 \n", | |
"\n", | |
" category_id id dataset \n", | |
"0 3 1 idaho \n", | |
"1 3 2 idaho " | |
] | |
}, | |
"execution_count": 129, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"extra_df=idaho_merged.append(cct_merged, ignore_index=True)\n", | |
"extra_df.head(2)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Get values corresponding to complementary classes\n", | |
" This is from the idaho dataset only!" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 78, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"idaho_needed=[2,5,6,7,9,12,15,20,21,22] #in idaho labels are still numbers\n", | |
"#idaho_needed=[\"2\",\"5\",\"6\",\"7\",\"9\",\"12\",\"15\",\"20\",\"21\",\"22\"]\n", | |
"try_anno_from_these=idaho_merged.loc[idaho_merged[\"category_id\"].isin(idaho_needed)]\n", | |
"try_anno_from_these.to_csv(\"/home/leslie/Documents/Uppsala/courses/DLreadingCourse/iwildstuff/idaho_necessary.csv\",index=False)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## So, bad news, this dataset is messy AF.\n", | |
"\n", | |
"CCT bboxes are actually the bounding boxes of what they call iWild. CCT without bboxes is what they call CCT big. and CCT small has no json or csv. Basically CCT small is useless.\n", | |
"\n", | |
"bboxes are of the type: [x, y, width, height]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 83, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"images\n", | |
"info\n", | |
"annotations\n", | |
"categories\n" | |
] | |
} | |
], | |
"source": [ | |
"f=\"/home/leslie/Documents/Uppsala/courses/DLreadingCourse/iwildstuff/iWildCam_2019_CCT_Bboxes.json\"\n", | |
"with open(f) as json_file: \n", | |
" cctbboxdata = json.load(json_file)\n", | |
" for p in cctbboxdata:\n", | |
" print(p)\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 95, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"-cctbbox_images_df\n", | |
"['date_captured', 'file_name', 'frame_num', 'height', 'id', 'location', 'rights_holder', 'seq_id', 'seq_num_frames', 'width']\n", | |
"\n", | |
"\n", | |
"-cctbbox_annotations_df\n", | |
"['bbox', 'category_id', 'id', 'image_id']\n", | |
"\n", | |
"\n", | |
"-cctbbox_categories_df\n", | |
"['id', 'name']\n", | |
"\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"cctbbox_images_df=pd.DataFrame(cctbboxdata[\"images\"])\n", | |
"cctbbox_annotations_df=pd.DataFrame(cctbboxdata[\"annotations\"])\n", | |
"cctbbox_categories_df=pd.DataFrame(cctbboxdata[\"categories\"])\n", | |
"\n", | |
"print(\"-cctbbox_images_df\")\n", | |
"s= [str(x) for x in list(cctbbox_images_df)] \n", | |
"print(s);print(\"\\n\")\n", | |
"\n", | |
"print(\"-cctbbox_annotations_df\")\n", | |
"s= [str(x) for x in list(cctbbox_annotations_df)] \n", | |
"print(s);print(\"\\n\")\n", | |
" \n", | |
"print(\"-cctbbox_categories_df\")\n", | |
"s= [str(x) for x in list(cctbbox_categories_df)] \n", | |
"print(s);print(\"\\n\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 119, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>date_captured</th>\n", | |
" <th>file_name</th>\n", | |
" <th>frame_num</th>\n", | |
" <th>height</th>\n", | |
" <th>image_id</th>\n", | |
" <th>location</th>\n", | |
" <th>rights_holder</th>\n", | |
" <th>seq_id</th>\n", | |
" <th>seq_num_frames</th>\n", | |
" <th>width</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2011-05-13 23:43:18</td>\n", | |
" <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n", | |
" <td>1</td>\n", | |
" <td>1494</td>\n", | |
" <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" <td>33</td>\n", | |
" <td>Justin Brown</td>\n", | |
" <td>6f084ccc-5567-11e8-bc84-dca9047ef277</td>\n", | |
" <td>3</td>\n", | |
" <td>2048</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" date_captured file_name frame_num \\\n", | |
"0 2011-05-13 23:43:18 5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg 1 \n", | |
"\n", | |
" height image_id location rights_holder \\\n", | |
"0 1494 5998cfa4-23d2-11e8-a6a3-ec086b02610b 33 Justin Brown \n", | |
"\n", | |
" seq_id seq_num_frames width \n", | |
"0 6f084ccc-5567-11e8-bc84-dca9047ef277 3 2048 " | |
] | |
}, | |
"execution_count": 119, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"cctbbox_images_df=cctbbox_images_df.rename(columns={\"id\":\"image_id\"})\n", | |
"cctbbox_images_df.head(1)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 105, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>bbox</th>\n", | |
" <th>category_id</th>\n", | |
" <th>id</th>\n", | |
" <th>image_id</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>[499.2, 711.68, 353.28, 199.68]</td>\n", | |
" <td>19</td>\n", | |
" <td>2a545520-cbf1-11e8-819c-970a9450cdbc</td>\n", | |
" <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>[985.6, 783.36, 368.64, 230.4]</td>\n", | |
" <td>19</td>\n", | |
" <td>2a54562e-cbf1-11e8-819c-970a9450cdbc</td>\n", | |
" <td>588a679f-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" bbox category_id \\\n", | |
"0 [499.2, 711.68, 353.28, 199.68] 19 \n", | |
"1 [985.6, 783.36, 368.64, 230.4] 19 \n", | |
"\n", | |
" id image_id \n", | |
"0 2a545520-cbf1-11e8-819c-970a9450cdbc 5998cfa4-23d2-11e8-a6a3-ec086b02610b \n", | |
"1 2a54562e-cbf1-11e8-819c-970a9450cdbc 588a679f-23d2-11e8-a6a3-ec086b02610b " | |
] | |
}, | |
"execution_count": 105, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"cctbbox_annotations_df.head(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 96, | |
"metadata": { | |
"collapsed": false, | |
"hide_input": true | |
}, | |
"outputs": [], | |
"source": [ | |
"#this was just to check that all images wuith bboxes are actually real files in the iwild folder\n", | |
"#ps: yes, they are\n", | |
"# arr=cctbbox_images_df[\"file_name\"].values\n", | |
"# location=\"/media/leslie/CBAEXT4/iwildcam_2019/train_images/\"\n", | |
"# existsIniWild=[]\n", | |
"# doesnotexistiniWild=[]\n", | |
"# for im in arr:\n", | |
"# exists = os.path.isfile(location+str(im))\n", | |
"# if(exists):\n", | |
"# existsIniWild.append(im)\n", | |
"# else:\n", | |
"# doesnotexistiniWild.append(im)\n", | |
"\n", | |
"# print(len(existsIniWild))\n", | |
"# print(len(doesnotexistiniWild))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 107, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"list" | |
] | |
}, | |
"execution_count": 107, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"type(cctbbox_annotations_df.iloc[0][\"bbox\"])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 114, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"#convert the bbox list into different columns\n", | |
"def bboxtocols(row,num):\n", | |
" bboxlist=list(row[\"bbox\"])\n", | |
" return int(bboxlist[num]//2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 115, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>bbox</th>\n", | |
" <th>category_id</th>\n", | |
" <th>id</th>\n", | |
" <th>image_id</th>\n", | |
" <th>x</th>\n", | |
" <th>y</th>\n", | |
" <th>w</th>\n", | |
" <th>h</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>[499.2, 711.68, 353.28, 199.68]</td>\n", | |
" <td>19</td>\n", | |
" <td>2a545520-cbf1-11e8-819c-970a9450cdbc</td>\n", | |
" <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" <td>249</td>\n", | |
" <td>355</td>\n", | |
" <td>176</td>\n", | |
" <td>99</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>[985.6, 783.36, 368.64, 230.4]</td>\n", | |
" <td>19</td>\n", | |
" <td>2a54562e-cbf1-11e8-819c-970a9450cdbc</td>\n", | |
" <td>588a679f-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" <td>492</td>\n", | |
" <td>391</td>\n", | |
" <td>184</td>\n", | |
" <td>115</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" bbox category_id \\\n", | |
"0 [499.2, 711.68, 353.28, 199.68] 19 \n", | |
"1 [985.6, 783.36, 368.64, 230.4] 19 \n", | |
"\n", | |
" id image_id \\\n", | |
"0 2a545520-cbf1-11e8-819c-970a9450cdbc 5998cfa4-23d2-11e8-a6a3-ec086b02610b \n", | |
"1 2a54562e-cbf1-11e8-819c-970a9450cdbc 588a679f-23d2-11e8-a6a3-ec086b02610b \n", | |
"\n", | |
" x y w h \n", | |
"0 249 355 176 99 \n", | |
"1 492 391 184 115 " | |
] | |
}, | |
"execution_count": 115, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#convert the bbox list into different columns\n", | |
"cctbbox_annotations_df[\"x\"]=cctbbox_annotations_df.apply(lambda row: bboxtocols(row,0), axis=1)\n", | |
"cctbbox_annotations_df[\"y\"]=cctbbox_annotations_df.apply(lambda row: bboxtocols(row,1), axis=1)\n", | |
"cctbbox_annotations_df[\"w\"]=cctbbox_annotations_df.apply(lambda row: bboxtocols(row,2), axis=1)\n", | |
"cctbbox_annotations_df[\"h\"]=cctbbox_annotations_df.apply(lambda row: bboxtocols(row,3), axis=1)\n", | |
"cctbbox_annotations_df.head(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 117, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>category_id</th>\n", | |
" <th>id</th>\n", | |
" <th>image_id</th>\n", | |
" <th>x</th>\n", | |
" <th>y</th>\n", | |
" <th>w</th>\n", | |
" <th>h</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>19</td>\n", | |
" <td>2a545520-cbf1-11e8-819c-970a9450cdbc</td>\n", | |
" <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" <td>249</td>\n", | |
" <td>355</td>\n", | |
" <td>176</td>\n", | |
" <td>99</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>19</td>\n", | |
" <td>2a54562e-cbf1-11e8-819c-970a9450cdbc</td>\n", | |
" <td>588a679f-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" <td>492</td>\n", | |
" <td>391</td>\n", | |
" <td>184</td>\n", | |
" <td>115</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" category_id id \\\n", | |
"0 19 2a545520-cbf1-11e8-819c-970a9450cdbc \n", | |
"1 19 2a54562e-cbf1-11e8-819c-970a9450cdbc \n", | |
"\n", | |
" image_id x y w h \n", | |
"0 5998cfa4-23d2-11e8-a6a3-ec086b02610b 249 355 176 99 \n", | |
"1 588a679f-23d2-11e8-a6a3-ec086b02610b 492 391 184 115 " | |
] | |
}, | |
"execution_count": 117, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#drop the bbox column\n", | |
"cctbbox_annotations_df=cctbbox_annotations_df.drop(\"bbox\",axis=1)\n", | |
"cctbbox_annotations_df.head(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 123, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>date_captured</th>\n", | |
" <th>file_name</th>\n", | |
" <th>frame_num</th>\n", | |
" <th>height</th>\n", | |
" <th>image_id</th>\n", | |
" <th>location</th>\n", | |
" <th>rights_holder</th>\n", | |
" <th>seq_id</th>\n", | |
" <th>seq_num_frames</th>\n", | |
" <th>width</th>\n", | |
" <th>category_id</th>\n", | |
" <th>dataset</th>\n", | |
" <th>id</th>\n", | |
" <th>x</th>\n", | |
" <th>y</th>\n", | |
" <th>w</th>\n", | |
" <th>h</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2011-05-13 23:43:18</td>\n", | |
" <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n", | |
" <td>1</td>\n", | |
" <td>1494</td>\n", | |
" <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" <td>33</td>\n", | |
" <td>Justin Brown</td>\n", | |
" <td>6f084ccc-5567-11e8-bc84-dca9047ef277</td>\n", | |
" <td>3</td>\n", | |
" <td>2048</td>\n", | |
" <td>19</td>\n", | |
" <td>iwild</td>\n", | |
" <td>2a545520-cbf1-11e8-819c-970a9450cdbc</td>\n", | |
" <td>249</td>\n", | |
" <td>355</td>\n", | |
" <td>176</td>\n", | |
" <td>99</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2012-03-17 03:48:44</td>\n", | |
" <td>588a679f-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n", | |
" <td>2</td>\n", | |
" <td>1494</td>\n", | |
" <td>588a679f-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" <td>115</td>\n", | |
" <td>Justin Brown</td>\n", | |
" <td>6f12067d-5567-11e8-b3c0-dca9047ef277</td>\n", | |
" <td>3</td>\n", | |
" <td>2048</td>\n", | |
" <td>19</td>\n", | |
" <td>iwild</td>\n", | |
" <td>2a54562e-cbf1-11e8-819c-970a9450cdbc</td>\n", | |
" <td>492</td>\n", | |
" <td>391</td>\n", | |
" <td>184</td>\n", | |
" <td>115</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" date_captured file_name frame_num \\\n", | |
"0 2011-05-13 23:43:18 5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg 1 \n", | |
"1 2012-03-17 03:48:44 588a679f-23d2-11e8-a6a3-ec086b02610b.jpg 2 \n", | |
"\n", | |
" height image_id location rights_holder \\\n", | |
"0 1494 5998cfa4-23d2-11e8-a6a3-ec086b02610b 33 Justin Brown \n", | |
"1 1494 588a679f-23d2-11e8-a6a3-ec086b02610b 115 Justin Brown \n", | |
"\n", | |
" seq_id seq_num_frames width category_id \\\n", | |
"0 6f084ccc-5567-11e8-bc84-dca9047ef277 3 2048 19 \n", | |
"1 6f12067d-5567-11e8-b3c0-dca9047ef277 3 2048 19 \n", | |
"\n", | |
" dataset id x y w h \n", | |
"0 iwild 2a545520-cbf1-11e8-819c-970a9450cdbc 249 355 176 99 \n", | |
"1 iwild 2a54562e-cbf1-11e8-819c-970a9450cdbc 492 391 184 115 " | |
] | |
}, | |
"execution_count": 123, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#merge images with annotations\n", | |
"cctbbox_merged=pd.merge(cctbbox_images_df,cctbbox_annotations_df,on=\"image_id\")\n", | |
"#create a column called dataset to know from which one it comes\n", | |
"cctbbox_merged.insert(11, \"dataset\", \"iwild\")\n", | |
"cctbbox_merged.head(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 124, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"date_captured\n", | |
"frame_num\n", | |
"height\n", | |
"location\n", | |
"rights_holder\n", | |
"seq_id\n", | |
"seq_num_frames\n", | |
"width\n", | |
"id\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>file_name</th>\n", | |
" <th>image_id</th>\n", | |
" <th>category_id</th>\n", | |
" <th>dataset</th>\n", | |
" <th>x</th>\n", | |
" <th>y</th>\n", | |
" <th>w</th>\n", | |
" <th>h</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n", | |
" <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" <td>19</td>\n", | |
" <td>iwild</td>\n", | |
" <td>249</td>\n", | |
" <td>355</td>\n", | |
" <td>176</td>\n", | |
" <td>99</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>588a679f-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n", | |
" <td>588a679f-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" <td>19</td>\n", | |
" <td>iwild</td>\n", | |
" <td>492</td>\n", | |
" <td>391</td>\n", | |
" <td>184</td>\n", | |
" <td>115</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" file_name \\\n", | |
"0 5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg \n", | |
"1 588a679f-23d2-11e8-a6a3-ec086b02610b.jpg \n", | |
"\n", | |
" image_id category_id dataset x y w \\\n", | |
"0 5998cfa4-23d2-11e8-a6a3-ec086b02610b 19 iwild 249 355 176 \n", | |
"1 588a679f-23d2-11e8-a6a3-ec086b02610b 19 iwild 492 391 184 \n", | |
"\n", | |
" h \n", | |
"0 99 \n", | |
"1 115 " | |
] | |
}, | |
"execution_count": 124, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#drop all useless columns. Keep: category, image_id, file_name, dataset, x, y, w, h\n", | |
"keepids=[\"category_id\",\"image_id\",\"file_name\",\"dataset\",\"x\",\"y\",\"w\",\"h\"]\n", | |
"cctbboxcols=cctbbox_merged.columns\n", | |
"for c in cctbboxcols:\n", | |
" if c not in keepids:\n", | |
" print(c)\n", | |
" cctbbox_merged=cctbbox_merged.drop(c,axis=1)\n", | |
" \n", | |
"cctbbox_merged.head(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 125, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>category_id</th>\n", | |
" <th>image_id</th>\n", | |
" <th>file_name</th>\n", | |
" <th>dataset</th>\n", | |
" <th>x</th>\n", | |
" <th>y</th>\n", | |
" <th>w</th>\n", | |
" <th>h</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>19</td>\n", | |
" <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" <td>5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n", | |
" <td>iwild</td>\n", | |
" <td>249</td>\n", | |
" <td>355</td>\n", | |
" <td>176</td>\n", | |
" <td>99</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>19</td>\n", | |
" <td>588a679f-23d2-11e8-a6a3-ec086b02610b</td>\n", | |
" <td>588a679f-23d2-11e8-a6a3-ec086b02610b.jpg</td>\n", | |
" <td>iwild</td>\n", | |
" <td>492</td>\n", | |
" <td>391</td>\n", | |
" <td>184</td>\n", | |
" <td>115</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" category_id image_id \\\n", | |
"0 19 5998cfa4-23d2-11e8-a6a3-ec086b02610b \n", | |
"1 19 588a679f-23d2-11e8-a6a3-ec086b02610b \n", | |
"\n", | |
" file_name dataset x y w h \n", | |
"0 5998cfa4-23d2-11e8-a6a3-ec086b02610b.jpg iwild 249 355 176 99 \n", | |
"1 588a679f-23d2-11e8-a6a3-ec086b02610b.jpg iwild 492 391 184 115 " | |
] | |
}, | |
"execution_count": 125, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#now organize them as our colleague is expecting them\n", | |
"cctbbox_merged=cctbbox_merged[[\"category_id\",\"image_id\",\"file_name\",\"dataset\",\"x\",\"y\",\"w\",\"h\"]]\n", | |
"cctbbox_merged.head(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 126, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"cctbbox_merged.to_csv(\"/home/leslie/Documents/Uppsala/courses/DLreadingCourse/iwildstuff/cctWithBboxesForiWild.csv\")" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.13" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment