Skip to content

Instantly share code, notes, and snippets.

@kmader
Created December 11, 2016 09:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kmader/53b72baa8c4bd5c08fa24f5147661b91 to your computer and use it in GitHub Desktop.
Save kmader/53b72baa8c4bd5c08fa24f5147661b91 to your computer and use it in GitHub Desktop.
A python script for making a submission to the InClass Kaggle competition for Micro and Nano Tomography
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd # for handling tables and csv file input / output\n",
"import numpy as np # for dealing with arrays and linear algebra operations\n",
"from skimage.io import imread # for reading images\n",
"from glob import glob # for listing files\n",
"import os # for dealing with paths"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Testing Data\n",
"The testing data should be in a subfolder called ```testing/``` and can be listed using the glob tool"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"['testing/000.png',\n",
" 'testing/001.png',\n",
" 'testing/002.png',\n",
" 'testing/003.png',\n",
" 'testing/004.png',\n",
" 'testing/005.png',\n",
" 'testing/006.png',\n",
" 'testing/007.png',\n",
" 'testing/008.png']"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_files = glob('testing/*.png')\n",
"test_files"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Read in the Image Data"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>filename</th>\n",
" <th>image</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>testing/000.png</td>\n",
" <td>[[178, 107, 130, 255, 255, 174, 255, 255, 255,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>testing/001.png</td>\n",
" <td>[[255, 62, 60, 189, 208, 234, 255, 255, 233, 2...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>testing/002.png</td>\n",
" <td>[[213, 184, 223, 255, 255, 255, 255, 255, 255,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>testing/003.png</td>\n",
" <td>[[138, 122, 28, 38, 93, 174, 235, 161, 225, 25...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>testing/004.png</td>\n",
" <td>[[220, 228, 255, 250, 205, 195, 190, 178, 170,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>testing/005.png</td>\n",
" <td>[[235, 248, 255, 214, 145, 137, 169, 255, 255,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>testing/006.png</td>\n",
" <td>[[236, 255, 190, 236, 255, 223, 250, 249, 228,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>testing/007.png</td>\n",
" <td>[[255, 240, 219, 255, 255, 255, 255, 255, 255,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>testing/008.png</td>\n",
" <td>[[255, 255, 255, 245, 225, 227, 255, 255, 255,...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" filename image\n",
"0 testing/000.png [[178, 107, 130, 255, 255, 174, 255, 255, 255,...\n",
"1 testing/001.png [[255, 62, 60, 189, 208, 234, 255, 255, 233, 2...\n",
"2 testing/002.png [[213, 184, 223, 255, 255, 255, 255, 255, 255,...\n",
"3 testing/003.png [[138, 122, 28, 38, 93, 174, 235, 161, 225, 25...\n",
"4 testing/004.png [[220, 228, 255, 250, 205, 195, 190, 178, 170,...\n",
"5 testing/005.png [[235, 248, 255, 214, 145, 137, 169, 255, 255,...\n",
"6 testing/006.png [[236, 255, 190, 236, 255, 223, 250, 249, 228,...\n",
"7 testing/007.png [[255, 240, 219, 255, 255, 255, 255, 255, 255,...\n",
"8 testing/008.png [[255, 255, 255, 245, 225, 227, 255, 255, 255,..."
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"file_df = pd.DataFrame(test_files, columns = [\"filename\"])\n",
"file_df['image'] = file_df['filename'].map(imread)\n",
"file_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Convert Images to Submission Format\n",
"The ```image_to_submission``` command turns an image into a list of points so that it can be easily compared by the Kaggle site"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>pix_id</th>\n",
" <th>intensity</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>4457</th>\n",
" <td>0001_105_034</td>\n",
" <td>255</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5048</th>\n",
" <td>0001_056_039</td>\n",
" <td>241</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1517</th>\n",
" <td>0001_109_011</td>\n",
" <td>237</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" pix_id intensity\n",
"4457 0001_105_034 255\n",
"5048 0001_056_039 241\n",
"1517 0001_109_011 237"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def image_to_submission(img_arr, img_id):\n",
" # create a list of x, y coordinates\n",
" xx, yy = np.meshgrid(np.arange(img_arr.shape[0]), np.arange(img_arr.shape[1]))\n",
" # combine them into an array\n",
" xy_img_arr = np.stack([xx.flatten().astype(np.int16), yy.flatten().astype(np.int16), img_arr.flatten()],1)\n",
" # make the array into a datatable\n",
" xy_df = pd.DataFrame(xy_img_arr, columns = ['x', 'y', 'intensity'])\n",
" xy_df['image_id'] = os.path.splitext(os.path.split(img_id)[1])[0] # take just the filename\n",
" xy_df['pix_id'] = xy_df.apply(lambda x: '%s_%03d_%03d' % (x['image_id'], x['x'], x['y']),1)\n",
" return xy_df[['pix_id', 'intensity']]\n",
"image_to_submission(file_df['image'].values[0], '0001.jpg').sample(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Combine all the Submissions\n",
"Here we take all of the images and convert them to the submission format and then create one csv file which can be uploaded on the site"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def all_img_to_submission(in_file_df, image_key = 'image'):\n",
" return pd.concat([image_to_submission(c_row[image_key], c_row['filename'])\\\n",
" for _, c_row in in_file_df.iterrows()])\n",
"# make a submission file\n",
"all_img_to_submission(file_df).to_csv('simple_submission.csv', index=False) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment