Pked01/File_processing_image_annotations.ipynb

## File_processing_image_annotations.ipynb
{
  "cells": [
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2018-09-20T06:02:43.623325Z",
          "end_time": "2018-09-20T06:02:44.335579Z"
        },
        "trusted": true,
        "scrolled": true
      },
      "cell_type": "code",
      "source": "import pandas as pd\nimport numpy as np\n\nimport json,os,sys,pickle",
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": "/home/ubuntu/.virtualenvs/cv_p3/lib/python3.5/site-packages/matplotlib/__init__.py:1066: UserWarning: Duplicate key in file \"/home/ubuntu/.config/matplotlib/matplotlibrc\", line #2\n  (fname, cnt))\n/home/ubuntu/.virtualenvs/cv_p3/lib/python3.5/site-packages/matplotlib/__init__.py:1066: UserWarning: Duplicate key in file \"/home/ubuntu/.config/matplotlib/matplotlibrc\", line #3\n  (fname, cnt))\n",
          "name": "stderr"
        }
      ]
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2018-09-20T08:22:31.759414Z",
          "end_time": "2018-09-20T08:22:31.763254Z"
        },
        "trusted": true
      },
      "cell_type": "code",
      "source": "def get_all_file_path(source_dir,ext='.csv'):\n    \"\"\"\n    all images with csv extension exist in set of dirs\n    \"\"\"\n    op =[]\n    for root, dirs, files in os.walk(source_dir):\n        for file in files:\n            if file.endswith(\".csv\"):\n                 op.append(os.path.join(root, file))\n    return op",
      "execution_count": 66,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2018-09-20T08:22:35.738904Z",
          "end_time": "2018-09-20T08:22:35.744643Z"
        },
        "trusted": true
      },
      "cell_type": "code",
      "source": "all_csv = get_all_file_path('annotated_files/')\n\n### we need only rect ones\n\nonly_rect_files = [i for i in all_csv if i.__contains__('rect') ]",
      "execution_count": 67,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2018-09-20T07:28:24.041983Z",
          "end_time": "2018-09-20T07:28:24.164843Z"
        },
        "trusted": true
      },
      "cell_type": "code",
      "source": "ls annotated_files/construction_labours",
      "execution_count": 59,
      "outputs": [
        {
          "output_type": "stream",
          "text": "\u001b[0m\u001b[01;34mcont\u001b[0m/  labels.csv  \u001b[01;34mrect\u001b[0m/\r\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2018-09-19T10:16:52.396151Z",
          "end_time": "2018-09-19T10:16:52.400016Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "all_annotations= {}",
      "execution_count": 52,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2018-09-20T08:25:03.375800Z",
          "end_time": "2018-09-20T08:25:03.661922Z"
        },
        "trusted": true
      },
      "cell_type": "code",
      "source": "### label for construction-engineer/20180918_123855\nfolder_name = 'construction_labours'\nlabel = pd.read_csv('annotated_files/'+folder_name+'/labels.csv',index_col='index',usecols=[0,1])\nlabel_dict = label.to_dict()['name']\n\nfiles_of_interest = [i for i in only_rect_files if i.__contains__(folder_name)]    \n\nfor f in files_of_interest:\n    try:\n        df = pd.read_csv(f,usecols=list(range(6)),index_col='index')\n        df.label = [label_dict[i] for i in df.label]\n        all_annotations[f] = df\n    except Exception as e:\n        print(e)",
      "execution_count": 76,
      "outputs": [
        {
          "output_type": "stream",
          "text": "-1\n-1\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2018-09-20T08:25:41.237429Z",
          "end_time": "2018-09-20T08:25:41.271079Z"
        },
        "trusted": true
      },
      "cell_type": "code",
      "source": "#pickle.dump(all_annotations,open('all_annotations.pickle','wb'))\n\nall_annotations = pickle.load(open('all_annotations.pickle','rb'))",
      "execution_count": 77,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "start_time": "2018-09-20T08:29:27.265658Z",
          "end_time": "2018-09-20T08:29:27.737359Z"
        },
        "trusted": true
      },
      "cell_type": "code",
      "source": "for k,v in all_annotations.items():\n    dirs = k.split('/')\n\n    op = {}\n    op['complete'] = None\n    filename = dirs[-1].replace('.csv','')\n    op['filename'] = filename\n    labels = []\n    op['labels'] = labels\n    for i in v.iterrows():\n\n        labels_i = {}\n        center = {}\n        size = {}\n        center['x'] = (i[1]['x']+i[1]['width'])/2\n        center['y'] = (i[1]['y']+i[1]['height'])/2\n        labels_i[\"object_id\"] = i[0]+1\n        labels_i['centre'] = center\n        size['x'] = i[1]['width']\n        size['y'] = i[1]['height']\n        labels_i['size'] = size\n        labels_i['label_type'] = 'box'\n        labels_i['label_class'] = i[1]['label']\n        labels.append(labels_i)\n    with open(dirs[1]+'/'+os.path.splitext(os.path.basename(filename))[0]+'.json', 'w') as fp:\n        json.dump(op, fp, sort_keys=True, indent=4)",
      "execution_count": 80,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "name": "cv_p3",
      "display_name": "cv_p3",
      "language": "python"
    },
    "hide_input": false,
    "language_info": {
      "mimetype": "text/x-python",
      "nbconvert_exporter": "python",
      "name": "python",
      "file_extension": ".py",
      "version": "3.5.2",
      "pygments_lexer": "ipython3",
      "codemirror_mode": {
        "version": 3,
        "name": "ipython"
      }
    },
    "latex_envs": {
      "eqNumInitial": 1,
      "eqLabelWithNumbers": true,
      "current_citInitial": 1,
      "cite_by": "apalike",
      "bibliofile": "biblio.bib",
      "LaTeX_envs_menu_present": true,
      "labels_anchors": false,
      "latex_user_defs": false,
      "user_envs_cfg": false,
      "report_style_numbering": false,
      "autoclose": false,
      "autocomplete": true,
      "hotkeys": {
        "equation": "Ctrl-E",
        "itemize": "Ctrl-I"
      }
    },
    "gist": {
      "id": "",
      "data": {
        "description": "ABG/vision_related/helmet detection/downloads/File_processing.ipynb",
        "public": true
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}
	{
	"cells": [
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2018-09-20T06:02:43.623325Z",
	"end_time": "2018-09-20T06:02:44.335579Z"
	},
	"trusted": true,
	"scrolled": true
	},
	"cell_type": "code",
	"source": "import pandas as pd\nimport numpy as np\n\nimport json,os,sys,pickle",
	"execution_count": 1,
	"outputs": [
	{
	"output_type": "stream",
	"text": "/home/ubuntu/.virtualenvs/cv_p3/lib/python3.5/site-packages/matplotlib/__init__.py:1066: UserWarning: Duplicate key in file \"/home/ubuntu/.config/matplotlib/matplotlibrc\", line #2\n (fname, cnt))\n/home/ubuntu/.virtualenvs/cv_p3/lib/python3.5/site-packages/matplotlib/__init__.py:1066: UserWarning: Duplicate key in file \"/home/ubuntu/.config/matplotlib/matplotlibrc\", line #3\n (fname, cnt))\n",
	"name": "stderr"
	}
	]
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2018-09-20T08:22:31.759414Z",
	"end_time": "2018-09-20T08:22:31.763254Z"
	},
	"trusted": true
	},
	"cell_type": "code",
	"source": "def get_all_file_path(source_dir,ext='.csv'):\n \"\"\"\n all images with csv extension exist in set of dirs\n \"\"\"\n op =[]\n for root, dirs, files in os.walk(source_dir):\n for file in files:\n if file.endswith(\".csv\"):\n op.append(os.path.join(root, file))\n return op",
	"execution_count": 66,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2018-09-20T08:22:35.738904Z",
	"end_time": "2018-09-20T08:22:35.744643Z"
	},
	"trusted": true
	},
	"cell_type": "code",
	"source": "all_csv = get_all_file_path('annotated_files/')\n\n### we need only rect ones\n\nonly_rect_files = [i for i in all_csv if i.__contains__('rect') ]",
	"execution_count": 67,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2018-09-20T07:28:24.041983Z",
	"end_time": "2018-09-20T07:28:24.164843Z"
	},
	"trusted": true
	},
	"cell_type": "code",
	"source": "ls annotated_files/construction_labours",
	"execution_count": 59,
	"outputs": [
	{
	"output_type": "stream",
	"text": "\u001b[0m\u001b[01;34mcont\u001b[0m/ labels.csv \u001b[01;34mrect\u001b[0m/\r\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2018-09-19T10:16:52.396151Z",
	"end_time": "2018-09-19T10:16:52.400016Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "all_annotations= {}",
	"execution_count": 52,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2018-09-20T08:25:03.375800Z",
	"end_time": "2018-09-20T08:25:03.661922Z"
	},
	"trusted": true
	},
	"cell_type": "code",
	"source": "### label for construction-engineer/20180918_123855\nfolder_name = 'construction_labours'\nlabel = pd.read_csv('annotated_files/'+folder_name+'/labels.csv',index_col='index',usecols=[0,1])\nlabel_dict = label.to_dict()['name']\n\nfiles_of_interest = [i for i in only_rect_files if i.__contains__(folder_name)] \n\nfor f in files_of_interest:\n try:\n df = pd.read_csv(f,usecols=list(range(6)),index_col='index')\n df.label = [label_dict[i] for i in df.label]\n all_annotations[f] = df\n except Exception as e:\n print(e)",
	"execution_count": 76,
	"outputs": [
	{
	"output_type": "stream",
	"text": "-1\n-1\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2018-09-20T08:25:41.237429Z",
	"end_time": "2018-09-20T08:25:41.271079Z"
	},
	"trusted": true
	},
	"cell_type": "code",
	"source": "#pickle.dump(all_annotations,open('all_annotations.pickle','wb'))\n\nall_annotations = pickle.load(open('all_annotations.pickle','rb'))",
	"execution_count": 77,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"start_time": "2018-09-20T08:29:27.265658Z",
	"end_time": "2018-09-20T08:29:27.737359Z"
	},
	"trusted": true
	},
	"cell_type": "code",
	"source": "for k,v in all_annotations.items():\n dirs = k.split('/')\n\n op = {}\n op['complete'] = None\n filename = dirs[-1].replace('.csv','')\n op['filename'] = filename\n labels = []\n op['labels'] = labels\n for i in v.iterrows():\n\n labels_i = {}\n center = {}\n size = {}\n center['x'] = (i[1]['x']+i[1]['width'])/2\n center['y'] = (i[1]['y']+i[1]['height'])/2\n labels_i[\"object_id\"] = i[0]+1\n labels_i['centre'] = center\n size['x'] = i[1]['width']\n size['y'] = i[1]['height']\n labels_i['size'] = size\n labels_i['label_type'] = 'box'\n labels_i['label_class'] = i[1]['label']\n labels.append(labels_i)\n with open(dirs[1]+'/'+os.path.splitext(os.path.basename(filename))[0]+'.json', 'w') as fp:\n json.dump(op, fp, sort_keys=True, indent=4)",
	"execution_count": 80,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	}
	],
	"metadata": {
	"kernelspec": {
	"name": "cv_p3",
	"display_name": "cv_p3",
	"language": "python"
	},
	"hide_input": false,
	"language_info": {
	"mimetype": "text/x-python",
	"nbconvert_exporter": "python",
	"name": "python",
	"file_extension": ".py",
	"version": "3.5.2",
	"pygments_lexer": "ipython3",
	"codemirror_mode": {
	"version": 3,
	"name": "ipython"
	}
	},
	"latex_envs": {
	"eqNumInitial": 1,
	"eqLabelWithNumbers": true,
	"current_citInitial": 1,
	"cite_by": "apalike",
	"bibliofile": "biblio.bib",
	"LaTeX_envs_menu_present": true,
	"labels_anchors": false,
	"latex_user_defs": false,
	"user_envs_cfg": false,
	"report_style_numbering": false,
	"autoclose": false,
	"autocomplete": true,
	"hotkeys": {
	"equation": "Ctrl-E",
	"itemize": "Ctrl-I"
	}
	},
	"gist": {
	"id": "",
	"data": {
	"description": "ABG/vision_related/helmet detection/downloads/File_processing.ipynb",
	"public": true
	}
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}