Created
November 1, 2020 21:19
-
-
Save leigh-johnson/293f3380f15c496934e2846ec7f9ad16 to your computer and use it in GitHub Desktop.
Automatic VoTT Bounding Box suggestions with Google Cloud AutoML Vision model
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.5-final" | |
}, | |
"orig_nbformat": 2, | |
"kernelspec": { | |
"name": "Python 3.8.5 64-bit ('.venv')", | |
"display_name": "Python 3.8.5 64-bit ('.venv')", | |
"metadata": { | |
"interpreter": { | |
"hash": "d275ea291513c142d309cf1729fea5f027504164ee1a26fc0a6595b29a650687" | |
} | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2, | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
" image xmin ymin xmax ymax \\\n", | |
"0 -0-0uBnUSnc.mp4#t=0 378.079149 219.691963 393.018451 229.739478 \n", | |
"1 -0-0uBnUSnc.mp4#t=0.333333 289.442322 247.933643 308.510313 267.861507 \n", | |
"2 -0-0uBnUSnc.mp4#t=0.333333 320.733384 248.592176 443.941940 272.260692 \n", | |
"3 -0-0uBnUSnc.mp4#t=0.333333 286.215996 249.400038 479.883723 280.570265 \n", | |
"4 -0-0uBnUSnc.mp4#t=0.666667 294.331551 245.000853 313.399542 264.928717 \n", | |
"\n", | |
" label \n", | |
"0 nozzle \n", | |
"1 nozzle \n", | |
"2 print \n", | |
"3 raft \n", | |
"4 nozzle " | |
], | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>image</th>\n <th>xmin</th>\n <th>ymin</th>\n <th>xmax</th>\n <th>ymax</th>\n <th>label</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>-0-0uBnUSnc.mp4#t=0</td>\n <td>378.079149</td>\n <td>219.691963</td>\n <td>393.018451</td>\n <td>229.739478</td>\n <td>nozzle</td>\n </tr>\n <tr>\n <th>1</th>\n <td>-0-0uBnUSnc.mp4#t=0.333333</td>\n <td>289.442322</td>\n <td>247.933643</td>\n <td>308.510313</td>\n <td>267.861507</td>\n <td>nozzle</td>\n </tr>\n <tr>\n <th>2</th>\n <td>-0-0uBnUSnc.mp4#t=0.333333</td>\n <td>320.733384</td>\n <td>248.592176</td>\n <td>443.941940</td>\n <td>272.260692</td>\n <td>print</td>\n </tr>\n <tr>\n <th>3</th>\n <td>-0-0uBnUSnc.mp4#t=0.333333</td>\n <td>286.215996</td>\n <td>249.400038</td>\n <td>479.883723</td>\n <td>280.570265</td>\n <td>raft</td>\n </tr>\n <tr>\n <th>4</th>\n <td>-0-0uBnUSnc.mp4#t=0.666667</td>\n <td>294.331551</td>\n <td>245.000853</td>\n <td>313.399542</td>\n <td>264.928717</td>\n <td>nozzle</td>\n </tr>\n </tbody>\n</table>\n</div>" | |
}, | |
"metadata": {}, | |
"execution_count": 2 | |
} | |
], | |
"source": [ | |
"import pandas as pd\n", | |
"\n", | |
"# load VoTT CSV export\n", | |
"# notice: coordinates are absolute\n", | |
"df = pd.read_csv('/home/leigh/datasets/spaghetti/labeled/vott-csv-export/spaghetti_v1-export.csv')\n", | |
"df.head()\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 8926 entries, 0 to 8925\nData columns (total 6 columns):\n # Column Non-Null Count Dtype \n--- ------ -------------- ----- \n 0 image 8926 non-null object \n 1 xmin 8926 non-null float64\n 2 ymin 8926 non-null float64\n 3 xmax 8926 non-null float64\n 4 ymax 8926 non-null float64\n 5 label 8926 non-null object \ndtypes: float64(4), object(2)\nmemory usage: 418.5+ KB\n" | |
] | |
} | |
], | |
"source": [ | |
"df.info()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 68, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"finished 0 / 8926\n", | |
"finished 1000 / 8926\n", | |
"...\n" | |
] | |
} | |
], | |
"source": [ | |
"\n", | |
"import cv2\n", | |
"\n", | |
"base_path = '/home/leigh/datasets/spaghetti/labeled/vott-csv-export/'\n", | |
"\n", | |
"LOG_INTERVAL=2000\n", | |
"\n", | |
"# convert absolute coordinates to relative coordinates in [0, 1] range\n", | |
"for index, row in df.iterrows():\n", | |
" if index % LOG_INTERVAL == 0:\n", | |
" print(f'finished {index} / {len(df)}')\n", | |
" filename = row['image_path'].split('/')[-1]\n", | |
" img = cv2.imread(f'{base_path}{filename}')\n", | |
" height, width, channels = img.shape\n", | |
" df.at[index, 'x1_n'] = row['x1'] / width\n", | |
" df.at[index, 'x2_n']= row['x2'] / width \n", | |
" df.at[index, 'y1_n'] = row['y1'] / height\n", | |
" df.at[index, 'y2_n'] = row['y2'] / height\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 73, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"# write CSV with columns expected by AutoML Vision\n", | |
"df['none'] = ''\n", | |
"df.to_csv('/home/leigh/datasets/spaghetti/labeled/vott-csv-export/spaghetti_v1-normalized-export.csv', \n", | |
" columns=['set', 'image_path', 'label', 'x1_n', 'y1_n', 'none', 'none', 'x2_n', 'y2_n', 'none', 'none'],\n", | |
" index=False\n", | |
" )\n" | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment