Pked01/annotation_utility.ipynb

## annotation_utility.ipynb
{
  "cells": [
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2019-01-08T11:28:24.137963Z",
          "start_time": "2019-01-08T11:28:21.539253Z"
        },
        "trusted": true
      },
      "cell_type": "code",
      "source": "import pandas as pd\nimport numpy as np",
      "execution_count": 1,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2019-01-08T11:28:24.149288Z",
          "start_time": "2019-01-08T11:28:24.143768Z"
        },
        "trusted": true
      },
      "cell_type": "code",
      "source": "import os,json\nimport pickle",
      "execution_count": 2,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2019-01-08T11:28:24.297291Z",
          "start_time": "2019-01-08T11:28:24.152644Z"
        },
        "trusted": true
      },
      "cell_type": "code",
      "source": "def get_all_file_path(source_dir,ext='.csv'):\n    \"\"\"\n    all images with csv extension exist in set of dirs\n    \"\"\"\n    op =[]\n    for root, dirs, files in os.walk(source_dir):\n        for file in files:\n            if file.endswith(ext):\n                \n                 op.append(os.path.join(os.path.abspath(root), file))\n    return op",
      "execution_count": 3,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "#### convert video to image "
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "all_files = get_all_file_path('/home/prateek/Desktop/Video_Data/Video/4.Hot_mill_view/',ext=\".mp4\")\n\ncap = cv2.VideoCapture(all_files[0])\ndump_path = 'train_frame/'\ni = 0\nskip_frame = 5\nfor vid_file in all_files:\n    cap = cv2.VideoCapture(vid_file)\n    while True:\n        ret, frame = cap.read()\n        for idx in range(skip_frame):\n            ret,frame = cap.read()\n        if not ret:\n            break\n        i+=1\n        cv2.imwrite(dump_path+str(i).zfill(6)+'.jpg',frame)",
      "execution_count": 10,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "#### conversion to chainercv format (converts csv from gtmaker to json) VOC format"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "folder_path = 'rect/'\n\nfile_names = [i  for i in os.listdir(folder_path) if i.endswith('.csv')]\n\nfiles_paths = get_all_file_path(folder_path)",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "name_dict = {\n    0:'person',\n1:'helmet',\n2:'no_helmet',\n3:'vest',\n4:'no_vest'\n}",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "all_annos = []\nfor idx,i in enumerate(files_paths):\n    data_dict = {}\n    data_dict['complete'] = None\n    data_dict['filename'] = file_names[idx].replace('.csv','')\n    data = pd.read_csv(i,index_col='index')\n    labels = []\n    for row_no in range(data.shape[0]):\n        try:\n            size = {}\n            size['x'] = data.iloc[row_no]['width']\n            size['y'] = data.iloc[row_no]['height']\n            centre = {}\n            centre['x'] = int(data.iloc[row_no]['x'] + .5*data.iloc[row_no]['width'])\n            centre['y'] = int(data.iloc[row_no]['y'] + .5*data.iloc[row_no]['height'])\n            label_dict = {}\n            label_dict['label_class'] = name_dict[int(data.iloc[row_no]['label'])]\n            label_dict['label_type'] = \"box\"\n            label_dict['object_id'] = row_no\n            label_dict['size'] = size\n            label_dict['centre'] = centre\n            labels.append(label_dict)\n        except Exception as e:\n            print(e)\n    data_dict['labels'] = labels\n    with open(i.replace('.jpg.csv','__labels.json'), 'w') as outfile:\n        json.dump(data_dict, outfile,sort_keys=True, indent=4) \n    all_annos.append(data_dict)",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "#### json from multiple format to combined retinanet format(csv)\n#### filename, x1, x2, y1, y2, label"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "#convert json file to csv retinanet format\npath2srh = '../train_frame/'\nall_json = get_all_file_path(path2srh,'.json')\nop = []\nfor i in all_json:\n    json_file = open(path2srh+i)\n    json_str = json_file.read()\n    json_data = json.loads(json_str)\n    file_name = os.path.abspath(path2srh+i)\n    for label in json_data['labels']:\n        x1 = int(label['centre']['x']-label['size']['x']/2)\n        y1 = int(label['centre']['y']-label['size']['y']/2)\n        x2 = int(label['centre']['x']+label['size']['x']/2)\n        y2 = int(label['centre']['y']+label['size']['y']/2)\n        label_name = label['label_class']\n        op.append([file_name,x1,y1,x2,y2,label_name])\n        ",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df = pd.DataFrame(op)",
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "name": "cv_p35",
      "display_name": "cv_p35",
      "language": "python"
    },
    "language_info": {
      "pygments_lexer": "ipython3",
      "file_extension": ".py",
      "version": "3.5.2",
      "codemirror_mode": {
        "version": 3,
        "name": "ipython"
      },
      "name": "python",
      "nbconvert_exporter": "python",
      "mimetype": "text/x-python"
    },
    "gist": {
      "id": "",
      "data": {
        "description": "abg/helmet_n_vest/training_data_from_vids/annotation_utility.ipynb",
        "public": true
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}
	{
	"cells": [
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-01-08T11:28:24.137963Z",
	"start_time": "2019-01-08T11:28:21.539253Z"
	},
	"trusted": true
	},
	"cell_type": "code",
	"source": "import pandas as pd\nimport numpy as np",
	"execution_count": 1,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-01-08T11:28:24.149288Z",
	"start_time": "2019-01-08T11:28:24.143768Z"
	},
	"trusted": true
	},
	"cell_type": "code",
	"source": "import os,json\nimport pickle",
	"execution_count": 2,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-01-08T11:28:24.297291Z",
	"start_time": "2019-01-08T11:28:24.152644Z"
	},
	"trusted": true
	},
	"cell_type": "code",
	"source": "def get_all_file_path(source_dir,ext='.csv'):\n \"\"\"\n all images with csv extension exist in set of dirs\n \"\"\"\n op =[]\n for root, dirs, files in os.walk(source_dir):\n for file in files:\n if file.endswith(ext):\n \n op.append(os.path.join(os.path.abspath(root), file))\n return op",
	"execution_count": 3,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "#### convert video to image "
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "all_files = get_all_file_path('/home/prateek/Desktop/Video_Data/Video/4.Hot_mill_view/',ext=\".mp4\")\n\ncap = cv2.VideoCapture(all_files[0])\ndump_path = 'train_frame/'\ni = 0\nskip_frame = 5\nfor vid_file in all_files:\n cap = cv2.VideoCapture(vid_file)\n while True:\n ret, frame = cap.read()\n for idx in range(skip_frame):\n ret,frame = cap.read()\n if not ret:\n break\n i+=1\n cv2.imwrite(dump_path+str(i).zfill(6)+'.jpg',frame)",
	"execution_count": 10,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "#### conversion to chainercv format (converts csv from gtmaker to json) VOC format"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "folder_path = 'rect/'\n\nfile_names = [i for i in os.listdir(folder_path) if i.endswith('.csv')]\n\nfiles_paths = get_all_file_path(folder_path)",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "name_dict = {\n 0:'person',\n1:'helmet',\n2:'no_helmet',\n3:'vest',\n4:'no_vest'\n}",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "all_annos = []\nfor idx,i in enumerate(files_paths):\n data_dict = {}\n data_dict['complete'] = None\n data_dict['filename'] = file_names[idx].replace('.csv','')\n data = pd.read_csv(i,index_col='index')\n labels = []\n for row_no in range(data.shape[0]):\n try:\n size = {}\n size['x'] = data.iloc[row_no]['width']\n size['y'] = data.iloc[row_no]['height']\n centre = {}\n centre['x'] = int(data.iloc[row_no]['x'] + .5data.iloc[row_no]['width'])\n centre['y'] = int(data.iloc[row_no]['y'] + .5data.iloc[row_no]['height'])\n label_dict = {}\n label_dict['label_class'] = name_dict[int(data.iloc[row_no]['label'])]\n label_dict['label_type'] = \"box\"\n label_dict['object_id'] = row_no\n label_dict['size'] = size\n label_dict['centre'] = centre\n labels.append(label_dict)\n except Exception as e:\n print(e)\n data_dict['labels'] = labels\n with open(i.replace('.jpg.csv','__labels.json'), 'w') as outfile:\n json.dump(data_dict, outfile,sort_keys=True, indent=4) \n all_annos.append(data_dict)",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "#### json from multiple format to combined retinanet format(csv)\n#### filename, x1, x2, y1, y2, label"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "#convert json file to csv retinanet format\npath2srh = '../train_frame/'\nall_json = get_all_file_path(path2srh,'.json')\nop = []\nfor i in all_json:\n json_file = open(path2srh+i)\n json_str = json_file.read()\n json_data = json.loads(json_str)\n file_name = os.path.abspath(path2srh+i)\n for label in json_data['labels']:\n x1 = int(label['centre']['x']-label['size']['x']/2)\n y1 = int(label['centre']['y']-label['size']['y']/2)\n x2 = int(label['centre']['x']+label['size']['x']/2)\n y2 = int(label['centre']['y']+label['size']['y']/2)\n label_name = label['label_class']\n op.append([file_name,x1,y1,x2,y2,label_name])\n ",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "df = pd.DataFrame(op)",
	"execution_count": null,
	"outputs": []
	}
	],
	"metadata": {
	"kernelspec": {
	"name": "cv_p35",
	"display_name": "cv_p35",
	"language": "python"
	},
	"language_info": {
	"pygments_lexer": "ipython3",
	"file_extension": ".py",
	"version": "3.5.2",
	"codemirror_mode": {
	"version": 3,
	"name": "ipython"
	},
	"name": "python",
	"nbconvert_exporter": "python",
	"mimetype": "text/x-python"
	},
	"gist": {
	"id": "",
	"data": {
	"description": "abg/helmet_n_vest/training_data_from_vids/annotation_utility.ipynb",
	"public": true
	}
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}