Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save meramos/9f1a704dc3a01489836db11d02a267af to your computer and use it in GitHub Desktop.
Save meramos/9f1a704dc3a01489836db11d02a267af to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from PIL import Image\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>image</th>\n",
" <th>label</th>\n",
" <th>xmin</th>\n",
" <th>xmax</th>\n",
" <th>ymin</th>\n",
" <th>ymax</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>labeled_data/beecomb</td>\n",
" <td>honey</td>\n",
" <td>2470</td>\n",
" <td>2519</td>\n",
" <td>2129</td>\n",
" <td>2179</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>labeled_data/beecomb</td>\n",
" <td>honey</td>\n",
" <td>2519</td>\n",
" <td>2573</td>\n",
" <td>2128</td>\n",
" <td>2177</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" image label xmin xmax ymin ymax\n",
"0 labeled_data/beecomb honey 2470 2519 2129 2179\n",
"1 labeled_data/beecomb honey 2519 2573 2128 2177"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"res = 300"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"new_coordinates = {\n",
" \"image\": [],\n",
" \"label\": [],\n",
" \"xmin\": [],\n",
" \"xmax\": [],\n",
" \"ymin\": [],\n",
" \"ymax\": []\n",
"}\n",
"\n",
"for entry in df.groupby(\"image\"):\n",
"\n",
" image_name = entry[0][entry[0].find('/')+1:]\n",
" \n",
" im = Image.open(entry[0]+\".jpg\")\n",
" im_array = np.array(im)\n",
" \n",
" img_width = im_array.shape[1]\n",
" img_height = im_array.shape[0]\n",
"\n",
" the_df = entry[1]\n",
" \n",
" for x in range(0,img_width,res):\n",
"\n",
" for y in range(0,img_height,res):\n",
"\n",
" # determine xmin, xmax, ymin, ymax for chunk\n",
" x_start = x\n",
" x_end = x + res\n",
" y_start = y\n",
" y_end = y + res\n",
"\n",
" # if x_end exceeds width of original image, go backwards to get a chunk of size res x res\n",
" if x_end > img_width:\n",
" x_end = img_width\n",
" x_start = x_end - res\n",
" \n",
" # if y_end exceeds height of original image, go backwards to get a chunk of size res x res\n",
" if y_end > img_height:\n",
" y_end = img_height\n",
" y_start = y_end - res\n",
"\n",
" # get bounding boxes which exist within the chunk\n",
" search_df = the_df.loc[((the_df['xmin'] >= x_start) & (the_df['xmax'] <= x_end)) \n",
" & ((the_df['ymin'] >= y_start) & (the_df['ymax'] <= y_end))]\n",
" \n",
" # only keep chunks with bounding boxes\n",
" if not search_df.empty: \n",
"\n",
" # save chunk\n",
" array_chunk = im_array[y_start:y_end, x_start:x_end]\n",
" im_chunk = Image.fromarray(array_chunk)\n",
" chunk_path = \"chunks/chunk_\"+image_name+\"_\"+str(x)+\"_\"+str(y)+\".jpeg\"\n",
" im_chunk.save(chunk_path)\n",
" \n",
" # adjust bounding boxes for chunk\n",
" for index,row in search_df.iterrows():\n",
"\n",
" new_coordinates[\"image\"].append(chunk_path)\n",
" new_coordinates[\"label\"].append(row[\"label\"])\n",
" new_coordinates[\"xmin\"].append(row[\"xmin\"] - x_start)\n",
" new_coordinates[\"xmax\"].append(row[\"xmax\"] - x_start)\n",
" new_coordinates[\"ymin\"].append(row[\"ymin\"] - y_start)\n",
" new_coordinates[\"ymax\"].append(row[\"ymax\"] - y_start)\n",
" \n",
"new_df = pd.DataFrame.from_dict(new_coordinates)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>image</th>\n",
" <th>label</th>\n",
" <th>xmin</th>\n",
" <th>xmax</th>\n",
" <th>ymin</th>\n",
" <th>ymax</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>chunks/chunk_IMG_20190327_073026_900_2700.jpeg</td>\n",
" <td>empty</td>\n",
" <td>261</td>\n",
" <td>297</td>\n",
" <td>47</td>\n",
" <td>83</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>chunks/chunk_IMG_20190327_073026_900_2700.jpeg</td>\n",
" <td>empty</td>\n",
" <td>207</td>\n",
" <td>243</td>\n",
" <td>51</td>\n",
" <td>87</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>chunks/chunk_IMG_20190327_073026_900_2700.jpeg</td>\n",
" <td>empty</td>\n",
" <td>111</td>\n",
" <td>147</td>\n",
" <td>55</td>\n",
" <td>91</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>chunks/chunk_IMG_20190327_073026_1200_2700.jpeg</td>\n",
" <td>empty</td>\n",
" <td>240</td>\n",
" <td>276</td>\n",
" <td>39</td>\n",
" <td>75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>chunks/chunk_IMG_20190327_073026_1200_2700.jpeg</td>\n",
" <td>empty</td>\n",
" <td>191</td>\n",
" <td>227</td>\n",
" <td>40</td>\n",
" <td>76</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" image label xmin xmax ymin \\\n",
"0 chunks/chunk_IMG_20190327_073026_900_2700.jpeg empty 261 297 47 \n",
"1 chunks/chunk_IMG_20190327_073026_900_2700.jpeg empty 207 243 51 \n",
"2 chunks/chunk_IMG_20190327_073026_900_2700.jpeg empty 111 147 55 \n",
"3 chunks/chunk_IMG_20190327_073026_1200_2700.jpeg empty 240 276 39 \n",
"4 chunks/chunk_IMG_20190327_073026_1200_2700.jpeg empty 191 227 40 \n",
"\n",
" ymax \n",
"0 83 \n",
"1 87 \n",
"2 91 \n",
"3 75 \n",
"4 76 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"new_df.head()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment