Created
August 11, 2019 23:20
-
-
Save meramos/9f1a704dc3a01489836db11d02a267af to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"from PIL import Image\n", | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>image</th>\n", | |
" <th>label</th>\n", | |
" <th>xmin</th>\n", | |
" <th>xmax</th>\n", | |
" <th>ymin</th>\n", | |
" <th>ymax</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>labeled_data/beecomb</td>\n", | |
" <td>honey</td>\n", | |
" <td>2470</td>\n", | |
" <td>2519</td>\n", | |
" <td>2129</td>\n", | |
" <td>2179</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>labeled_data/beecomb</td>\n", | |
" <td>honey</td>\n", | |
" <td>2519</td>\n", | |
" <td>2573</td>\n", | |
" <td>2128</td>\n", | |
" <td>2177</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" image label xmin xmax ymin ymax\n", | |
"0 labeled_data/beecomb honey 2470 2519 2129 2179\n", | |
"1 labeled_data/beecomb honey 2519 2573 2128 2177" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.head(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"res = 300" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"new_coordinates = {\n", | |
" \"image\": [],\n", | |
" \"label\": [],\n", | |
" \"xmin\": [],\n", | |
" \"xmax\": [],\n", | |
" \"ymin\": [],\n", | |
" \"ymax\": []\n", | |
"}\n", | |
"\n", | |
"for entry in df.groupby(\"image\"):\n", | |
"\n", | |
" image_name = entry[0][entry[0].find('/')+1:]\n", | |
" \n", | |
" im = Image.open(entry[0]+\".jpg\")\n", | |
" im_array = np.array(im)\n", | |
" \n", | |
" img_width = im_array.shape[1]\n", | |
" img_height = im_array.shape[0]\n", | |
"\n", | |
" the_df = entry[1]\n", | |
" \n", | |
" for x in range(0,img_width,res):\n", | |
"\n", | |
" for y in range(0,img_height,res):\n", | |
"\n", | |
" # determine xmin, xmax, ymin, ymax for chunk\n", | |
" x_start = x\n", | |
" x_end = x + res\n", | |
" y_start = y\n", | |
" y_end = y + res\n", | |
"\n", | |
" # if x_end exceeds width of original image, go backwards to get a chunk of size res x res\n", | |
" if x_end > img_width:\n", | |
" x_end = img_width\n", | |
" x_start = x_end - res\n", | |
" \n", | |
" # if y_end exceeds height of original image, go backwards to get a chunk of size res x res\n", | |
" if y_end > img_height:\n", | |
" y_end = img_height\n", | |
" y_start = y_end - res\n", | |
"\n", | |
" # get bounding boxes which exist within the chunk\n", | |
" search_df = the_df.loc[((the_df['xmin'] >= x_start) & (the_df['xmax'] <= x_end)) \n", | |
" & ((the_df['ymin'] >= y_start) & (the_df['ymax'] <= y_end))]\n", | |
" \n", | |
" # only keep chunks with bounding boxes\n", | |
" if not search_df.empty: \n", | |
"\n", | |
" # save chunk\n", | |
" array_chunk = im_array[y_start:y_end, x_start:x_end]\n", | |
" im_chunk = Image.fromarray(array_chunk)\n", | |
" chunk_path = \"chunks/chunk_\"+image_name+\"_\"+str(x)+\"_\"+str(y)+\".jpeg\"\n", | |
" im_chunk.save(chunk_path)\n", | |
" \n", | |
" # adjust bounding boxes for chunk\n", | |
" for index,row in search_df.iterrows():\n", | |
"\n", | |
" new_coordinates[\"image\"].append(chunk_path)\n", | |
" new_coordinates[\"label\"].append(row[\"label\"])\n", | |
" new_coordinates[\"xmin\"].append(row[\"xmin\"] - x_start)\n", | |
" new_coordinates[\"xmax\"].append(row[\"xmax\"] - x_start)\n", | |
" new_coordinates[\"ymin\"].append(row[\"ymin\"] - y_start)\n", | |
" new_coordinates[\"ymax\"].append(row[\"ymax\"] - y_start)\n", | |
" \n", | |
"new_df = pd.DataFrame.from_dict(new_coordinates)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>image</th>\n", | |
" <th>label</th>\n", | |
" <th>xmin</th>\n", | |
" <th>xmax</th>\n", | |
" <th>ymin</th>\n", | |
" <th>ymax</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>chunks/chunk_IMG_20190327_073026_900_2700.jpeg</td>\n", | |
" <td>empty</td>\n", | |
" <td>261</td>\n", | |
" <td>297</td>\n", | |
" <td>47</td>\n", | |
" <td>83</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>chunks/chunk_IMG_20190327_073026_900_2700.jpeg</td>\n", | |
" <td>empty</td>\n", | |
" <td>207</td>\n", | |
" <td>243</td>\n", | |
" <td>51</td>\n", | |
" <td>87</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>chunks/chunk_IMG_20190327_073026_900_2700.jpeg</td>\n", | |
" <td>empty</td>\n", | |
" <td>111</td>\n", | |
" <td>147</td>\n", | |
" <td>55</td>\n", | |
" <td>91</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>chunks/chunk_IMG_20190327_073026_1200_2700.jpeg</td>\n", | |
" <td>empty</td>\n", | |
" <td>240</td>\n", | |
" <td>276</td>\n", | |
" <td>39</td>\n", | |
" <td>75</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>chunks/chunk_IMG_20190327_073026_1200_2700.jpeg</td>\n", | |
" <td>empty</td>\n", | |
" <td>191</td>\n", | |
" <td>227</td>\n", | |
" <td>40</td>\n", | |
" <td>76</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" image label xmin xmax ymin \\\n", | |
"0 chunks/chunk_IMG_20190327_073026_900_2700.jpeg empty 261 297 47 \n", | |
"1 chunks/chunk_IMG_20190327_073026_900_2700.jpeg empty 207 243 51 \n", | |
"2 chunks/chunk_IMG_20190327_073026_900_2700.jpeg empty 111 147 55 \n", | |
"3 chunks/chunk_IMG_20190327_073026_1200_2700.jpeg empty 240 276 39 \n", | |
"4 chunks/chunk_IMG_20190327_073026_1200_2700.jpeg empty 191 227 40 \n", | |
"\n", | |
" ymax \n", | |
"0 83 \n", | |
"1 87 \n", | |
"2 91 \n", | |
"3 75 \n", | |
"4 76 " | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"new_df.head()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment