Skip to content

Instantly share code, notes, and snippets.

@ZenLiuCN
Created April 1, 2019 13:55
Show Gist options
  • Save ZenLiuCN/eefcf2b8e3923197160dc77d789c79fc to your computer and use it in GitHub Desktop.
Save ZenLiuCN/eefcf2b8e3923197160dc77d789c79fc to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"ExecuteTime": {
"end_time": "2019-03-31T12:48:49.888930Z",
"start_time": "2019-03-31T12:41:45.039640Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
">> DONE \n"
]
}
],
"source": [
"# coding=UTF-8\n",
"import os\n",
"import sys\n",
"import cv2\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"classes={\n",
" \"鞭虫卵\":\"Whipworm\",\n",
" \"带绦虫卵\":\"TaeniaSolium\",\n",
" \"肺吸虫卵\":\"Paragonimusi\",\n",
" \"钩虫卵\":\"Hookworm\",\n",
" \"姜片虫卵\":\"Fasciolopsis\",\n",
" \"蛲虫卵\":\"Pinworm\",\n",
" \"受精蛔虫卵\":\"FertilizedAscaris\",\n",
" \"未受精蛔虫卵\":\"Ascaris\",\n",
" \"血吸虫卵\":\"Schistosoma\"\n",
"}\n",
"\n",
"def convert_source(src,out_dir):\n",
" \"\"\"\n",
" 转换输出图像\n",
" \"\"\"\n",
" for class_name in classes.keys(): #读取映射\n",
" images_list = os.listdir(os.path.join(src,class_name)) #遍历文件\n",
" if not os.path.exists('{}/{}'.format(out_dir,classes[class_name])):\n",
" os.makedirs('{}/{}'.format(out_dir,classes[class_name]))\n",
" for idx,image_name in enumerate(images_list):\n",
" if image_name.endswith('m.jpg')or image_name.endswith('中.jpg'):\n",
" sys.stdout.write(\"\\r>>process:{}/{} \".format(class_name,image_name))\n",
" sys.stdout.flush()\n",
" cut('{}/{}/{}'.format(src,class_name,image_name),\n",
" '{}/{}/{}_{}'.format(out_dir,\n",
" classes[class_name],\n",
" classes[class_name],\n",
" idx),0)\n",
" cut('{}/{}/{}'.format(src,class_name,image_name),\n",
" '{}/{}/{}_{}'.format(out_dir,\n",
" classes[class_name],\n",
" classes[class_name],\n",
" idx),4)\n",
" cut('{}/{}/{}'.format(src,class_name,image_name),\n",
" '{}/{}/{}_{}'.format(out_dir,\n",
" classes[class_name],\n",
" classes[class_name],\n",
" idx),9)\n",
" sys.stdout.write(\"\\r>> DONE \")\n",
" sys.stdout.write(\"\\n\")\n",
" sys.stdout.flush()\n",
"\n",
"def to_rgb(img):\n",
" return cv2.cvtColor(img,cv2.COLOR_BGR2RGB)\n",
"\n",
"\n",
"def resize(im):\n",
" return cv2.resize(im,(299,299))\n",
"\n",
"def save4(im,q,path,idx=0):\n",
" cv2.imwrite(\"{}_4_{}_0.jpg\".format(path,idx),resize(im[0:q,0:q]))\n",
" cv2.imwrite(\"{}_4_{}_1.jpg\".format(path,idx),resize(im[0:q,q:]))\n",
" cv2.imwrite(\"{}_4_{}_2.jpg\".format(path,idx),resize(im[q:,0:q]))\n",
" cv2.imwrite(\"{}_4_{}_3.jpg\".format(path,idx),resize(im[q:,q:]))\n",
" \n",
"def save(im,path,idx=0):\n",
" cv2.imwrite(\"{}_0_{}.jpg\".format(path,idx),resize(im))\n",
"\n",
"def save9(im,q,path,idx=0):\n",
" cv2.imwrite(\"{}_9_{}_0.jpg\".format(path,idx),resize(im[0:q,0:q]))\n",
" cv2.imwrite(\"{}_9_{}_1.jpg\".format(path,idx),resize(im[0:q,q:q*2]))\n",
" cv2.imwrite(\"{}_9_{}_2.jpg\".format(path,idx),resize(im[0:q,q*2:]))\n",
" cv2.imwrite(\"{}_9_{}_3.jpg\".format(path,idx),resize(im[q:q*2,0:q]))\n",
" cv2.imwrite(\"{}_9_{}_4.jpg\".format(path,idx),resize(im[q:q*2,q:q*2]))\n",
" cv2.imwrite(\"{}_9_{}_5.jpg\".format(path,idx),resize(im[q:q*2,q*2:]))\n",
" cv2.imwrite(\"{}_9_{}_6.jpg\".format(path,idx),resize(im[q*2:,0:q]))\n",
" cv2.imwrite(\"{}_9_{}_7.jpg\".format(path,idx),resize(im[q*2:,q:q*2]))\n",
" cv2.imwrite(\"{}_9_{}_8.jpg\".format(path,idx),resize(im[q*2:,q*2:]))\n",
"\n",
" \n",
"def cut(path,out,mode=9):\n",
"# im=cv2.imread(path)\n",
" im=cv2.imdecode(np.fromfile(path,dtype=np.uint8),cv2.IMREAD_COLOR)\n",
" (h,w,_)=im.shape\n",
" if mode==0:\n",
" save(im[0:h,0:h],out,0)\n",
" save(im[0:,w-h:],out,1)\n",
" elif mode==4:\n",
" save4(im[0:h,0:h],int(im.shape[0]/2),out,0)\n",
" save4(im[0:h,w-h:],int(im.shape[0]/2),out,1)\n",
" else:\n",
" save9(im[0:h,0:h],int(im.shape[0]/3),out,0)\n",
" save9(im[0:h,w-h:],int(im.shape[0]/3),out,1)\n",
"\n",
" \n",
"convert_source('source','datas')\n"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"ExecuteTime": {
"end_time": "2019-03-31T12:54:37.336510Z",
"start_time": "2019-03-31T12:54:32.922719Z"
},
"code_folding": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
">> Converting train image 904/904 shard 2\n",
">> Converting validation image 350/350 shard 2\n"
]
}
],
"source": [
"# coding=UTF-8\n",
"\n",
"import os\n",
"import random\n",
"import sys\n",
"import dataset_utils\n",
"import math\n",
"import os\n",
"import tensorflow as tf\n",
"# generate data list\n",
"\n",
"def generate_list(classes, data_dir, list_path, train_list_path, val_list_path):\n",
" \"\"\"\n",
" 生成待处理数据清单\n",
" \"\"\"\n",
" fd = open(list_path, 'w')\n",
" for class_name in classes.keys():\n",
" images_list = os.listdir(os.path.join(data_dir , class_name))\n",
" for image_name in images_list:\n",
" fd.write('{}/{} {}\\n'.format(class_name, image_name, classes[class_name]))\n",
" fd.close()\n",
" _NUM_VALIDATION = 350\n",
" _RANDOM_SEED = 0\n",
" fd = open(list_path)\n",
" lines = fd.readlines()\n",
" fd.close()\n",
" random.seed(_RANDOM_SEED)\n",
" random.shuffle(lines)\n",
" fd = open(train_list_path, 'w')\n",
" for line in lines[_NUM_VALIDATION:]:\n",
" fd.write(line)\n",
" fd.close()\n",
" fd = open(val_list_path, 'w')\n",
" for line in lines[:_NUM_VALIDATION]:\n",
" fd.write(line)\n",
" fd.close()\n",
" \n",
" \n",
"def convert_dataset(name,list_path, data_dir, output_dir, is_train=True, _NUM_SHARDS=3):\n",
" \"\"\"\n",
" 生成TF数据集\n",
" \"\"\"\n",
" fd = open(list_path)\n",
" lines = [line.split() for line in fd]\n",
" fd.close()\n",
" prefix ='train'\n",
" if not is_train:\n",
" prefix = 'validation'\n",
" num_per_shard = int(math.ceil(len(lines) / float(_NUM_SHARDS)))\n",
" with tf.Graph().as_default():\n",
" decode_jpeg_data = tf.placeholder(dtype=tf.string)\n",
" decode_jpeg = tf.image.decode_jpeg(decode_jpeg_data, channels=3)\n",
" with tf.Session('') as sess:\n",
" for shard_id in range(_NUM_SHARDS):\n",
" output_path = os.path.join(output_dir,\n",
" '{}_{}_{:03}-of-{:03}.tfrecord'.format(name,prefix,shard_id, _NUM_SHARDS))\n",
" tfrecord_writer = tf.python_io.TFRecordWriter(output_path)\n",
" start_ndx = shard_id * num_per_shard\n",
" end_ndx = min((shard_id + 1) * num_per_shard, len(lines))\n",
" for i in range(start_ndx, end_ndx):\n",
" sys.stdout.write('\\r>> Converting {} image {}/{} shard {}'.format(\n",
" prefix,i + 1, len(lines), shard_id))\n",
" sys.stdout.flush()\n",
" image_data = tf.gfile.GFile(os.path.join(data_dir, lines[i][0]), 'rb').read()\n",
" image = sess.run(decode_jpeg, feed_dict={decode_jpeg_data: image_data})\n",
" height, width = image.shape[0], image.shape[1]\n",
" example = dataset_utils.image_to_tfexample(\n",
" image_data, b'jpg', height, width, int(lines[i][1]))\n",
" tfrecord_writer.write(example.SerializeToString())\n",
" tfrecord_writer.close()\n",
" sys.stdout.write('\\n')\n",
" sys.stdout.flush()\n",
" \n",
" \n",
"if __name__ == '__main__':\n",
" name='eggs'\n",
" classes={\n",
" \"Whipworm\":0,\n",
" \"TaeniaSolium\":1,\n",
" \"Paragonimusi\":2,\n",
" \"Hookworm\":3,\n",
" \"Fasciolopsis\":4,\n",
" \"Pinworm\":5,\n",
" \"FertilizedAscaris\":6,\n",
" \"Ascaris\":7,\n",
" \"Schistosoma\":8\n",
" }\n",
" generate_list(\n",
" classes=classes,\n",
" data_dir='datas',\n",
" list_path='list.txt',\n",
" train_list_path='list_train.txt',\n",
" val_list_path='list_val.txt'\n",
" )\n",
" convert_dataset(name,'list_train.txt', 'datas', 'data_set')\n",
" convert_dataset(name,'list_val.txt', 'datas', 'data_set', False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:root] *",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment