Created
April 1, 2019 13:55
-
-
Save ZenLiuCN/eefcf2b8e3923197160dc77d789c79fc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2019-03-31T12:48:49.888930Z", | |
"start_time": "2019-03-31T12:41:45.039640Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
">> DONE \n" | |
] | |
} | |
], | |
"source": [ | |
"# coding=UTF-8\n", | |
"import os\n", | |
"import sys\n", | |
"import cv2\n", | |
"import numpy as np\n", | |
"import matplotlib.pyplot as plt\n", | |
"classes={\n", | |
" \"鞭虫卵\":\"Whipworm\",\n", | |
" \"带绦虫卵\":\"TaeniaSolium\",\n", | |
" \"肺吸虫卵\":\"Paragonimusi\",\n", | |
" \"钩虫卵\":\"Hookworm\",\n", | |
" \"姜片虫卵\":\"Fasciolopsis\",\n", | |
" \"蛲虫卵\":\"Pinworm\",\n", | |
" \"受精蛔虫卵\":\"FertilizedAscaris\",\n", | |
" \"未受精蛔虫卵\":\"Ascaris\",\n", | |
" \"血吸虫卵\":\"Schistosoma\"\n", | |
"}\n", | |
"\n", | |
"def convert_source(src,out_dir):\n", | |
" \"\"\"\n", | |
" 转换输出图像\n", | |
" \"\"\"\n", | |
" for class_name in classes.keys(): #读取映射\n", | |
" images_list = os.listdir(os.path.join(src,class_name)) #遍历文件\n", | |
" if not os.path.exists('{}/{}'.format(out_dir,classes[class_name])):\n", | |
" os.makedirs('{}/{}'.format(out_dir,classes[class_name]))\n", | |
" for idx,image_name in enumerate(images_list):\n", | |
" if image_name.endswith('m.jpg')or image_name.endswith('中.jpg'):\n", | |
" sys.stdout.write(\"\\r>>process:{}/{} \".format(class_name,image_name))\n", | |
" sys.stdout.flush()\n", | |
" cut('{}/{}/{}'.format(src,class_name,image_name),\n", | |
" '{}/{}/{}_{}'.format(out_dir,\n", | |
" classes[class_name],\n", | |
" classes[class_name],\n", | |
" idx),0)\n", | |
" cut('{}/{}/{}'.format(src,class_name,image_name),\n", | |
" '{}/{}/{}_{}'.format(out_dir,\n", | |
" classes[class_name],\n", | |
" classes[class_name],\n", | |
" idx),4)\n", | |
" cut('{}/{}/{}'.format(src,class_name,image_name),\n", | |
" '{}/{}/{}_{}'.format(out_dir,\n", | |
" classes[class_name],\n", | |
" classes[class_name],\n", | |
" idx),9)\n", | |
" sys.stdout.write(\"\\r>> DONE \")\n", | |
" sys.stdout.write(\"\\n\")\n", | |
" sys.stdout.flush()\n", | |
"\n", | |
"def to_rgb(img):\n", | |
" return cv2.cvtColor(img,cv2.COLOR_BGR2RGB)\n", | |
"\n", | |
"\n", | |
"def resize(im):\n", | |
" return cv2.resize(im,(299,299))\n", | |
"\n", | |
"def save4(im,q,path,idx=0):\n", | |
" cv2.imwrite(\"{}_4_{}_0.jpg\".format(path,idx),resize(im[0:q,0:q]))\n", | |
" cv2.imwrite(\"{}_4_{}_1.jpg\".format(path,idx),resize(im[0:q,q:]))\n", | |
" cv2.imwrite(\"{}_4_{}_2.jpg\".format(path,idx),resize(im[q:,0:q]))\n", | |
" cv2.imwrite(\"{}_4_{}_3.jpg\".format(path,idx),resize(im[q:,q:]))\n", | |
" \n", | |
"def save(im,path,idx=0):\n", | |
" cv2.imwrite(\"{}_0_{}.jpg\".format(path,idx),resize(im))\n", | |
"\n", | |
"def save9(im,q,path,idx=0):\n", | |
" cv2.imwrite(\"{}_9_{}_0.jpg\".format(path,idx),resize(im[0:q,0:q]))\n", | |
" cv2.imwrite(\"{}_9_{}_1.jpg\".format(path,idx),resize(im[0:q,q:q*2]))\n", | |
" cv2.imwrite(\"{}_9_{}_2.jpg\".format(path,idx),resize(im[0:q,q*2:]))\n", | |
" cv2.imwrite(\"{}_9_{}_3.jpg\".format(path,idx),resize(im[q:q*2,0:q]))\n", | |
" cv2.imwrite(\"{}_9_{}_4.jpg\".format(path,idx),resize(im[q:q*2,q:q*2]))\n", | |
" cv2.imwrite(\"{}_9_{}_5.jpg\".format(path,idx),resize(im[q:q*2,q*2:]))\n", | |
" cv2.imwrite(\"{}_9_{}_6.jpg\".format(path,idx),resize(im[q*2:,0:q]))\n", | |
" cv2.imwrite(\"{}_9_{}_7.jpg\".format(path,idx),resize(im[q*2:,q:q*2]))\n", | |
" cv2.imwrite(\"{}_9_{}_8.jpg\".format(path,idx),resize(im[q*2:,q*2:]))\n", | |
"\n", | |
" \n", | |
"def cut(path,out,mode=9):\n", | |
"# im=cv2.imread(path)\n", | |
" im=cv2.imdecode(np.fromfile(path,dtype=np.uint8),cv2.IMREAD_COLOR)\n", | |
" (h,w,_)=im.shape\n", | |
" if mode==0:\n", | |
" save(im[0:h,0:h],out,0)\n", | |
" save(im[0:,w-h:],out,1)\n", | |
" elif mode==4:\n", | |
" save4(im[0:h,0:h],int(im.shape[0]/2),out,0)\n", | |
" save4(im[0:h,w-h:],int(im.shape[0]/2),out,1)\n", | |
" else:\n", | |
" save9(im[0:h,0:h],int(im.shape[0]/3),out,0)\n", | |
" save9(im[0:h,w-h:],int(im.shape[0]/3),out,1)\n", | |
"\n", | |
" \n", | |
"convert_source('source','datas')\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2019-03-31T12:54:37.336510Z", | |
"start_time": "2019-03-31T12:54:32.922719Z" | |
}, | |
"code_folding": [] | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
">> Converting train image 904/904 shard 2\n", | |
">> Converting validation image 350/350 shard 2\n" | |
] | |
} | |
], | |
"source": [ | |
"# coding=UTF-8\n", | |
"\n", | |
"import os\n", | |
"import random\n", | |
"import sys\n", | |
"import dataset_utils\n", | |
"import math\n", | |
"import os\n", | |
"import tensorflow as tf\n", | |
"# generate data list\n", | |
"\n", | |
"def generate_list(classes, data_dir, list_path, train_list_path, val_list_path):\n", | |
" \"\"\"\n", | |
" 生成待处理数据清单\n", | |
" \"\"\"\n", | |
" fd = open(list_path, 'w')\n", | |
" for class_name in classes.keys():\n", | |
" images_list = os.listdir(os.path.join(data_dir , class_name))\n", | |
" for image_name in images_list:\n", | |
" fd.write('{}/{} {}\\n'.format(class_name, image_name, classes[class_name]))\n", | |
" fd.close()\n", | |
" _NUM_VALIDATION = 350\n", | |
" _RANDOM_SEED = 0\n", | |
" fd = open(list_path)\n", | |
" lines = fd.readlines()\n", | |
" fd.close()\n", | |
" random.seed(_RANDOM_SEED)\n", | |
" random.shuffle(lines)\n", | |
" fd = open(train_list_path, 'w')\n", | |
" for line in lines[_NUM_VALIDATION:]:\n", | |
" fd.write(line)\n", | |
" fd.close()\n", | |
" fd = open(val_list_path, 'w')\n", | |
" for line in lines[:_NUM_VALIDATION]:\n", | |
" fd.write(line)\n", | |
" fd.close()\n", | |
" \n", | |
" \n", | |
"def convert_dataset(name,list_path, data_dir, output_dir, is_train=True, _NUM_SHARDS=3):\n", | |
" \"\"\"\n", | |
" 生成TF数据集\n", | |
" \"\"\"\n", | |
" fd = open(list_path)\n", | |
" lines = [line.split() for line in fd]\n", | |
" fd.close()\n", | |
" prefix ='train'\n", | |
" if not is_train:\n", | |
" prefix = 'validation'\n", | |
" num_per_shard = int(math.ceil(len(lines) / float(_NUM_SHARDS)))\n", | |
" with tf.Graph().as_default():\n", | |
" decode_jpeg_data = tf.placeholder(dtype=tf.string)\n", | |
" decode_jpeg = tf.image.decode_jpeg(decode_jpeg_data, channels=3)\n", | |
" with tf.Session('') as sess:\n", | |
" for shard_id in range(_NUM_SHARDS):\n", | |
" output_path = os.path.join(output_dir,\n", | |
" '{}_{}_{:03}-of-{:03}.tfrecord'.format(name,prefix,shard_id, _NUM_SHARDS))\n", | |
" tfrecord_writer = tf.python_io.TFRecordWriter(output_path)\n", | |
" start_ndx = shard_id * num_per_shard\n", | |
" end_ndx = min((shard_id + 1) * num_per_shard, len(lines))\n", | |
" for i in range(start_ndx, end_ndx):\n", | |
" sys.stdout.write('\\r>> Converting {} image {}/{} shard {}'.format(\n", | |
" prefix,i + 1, len(lines), shard_id))\n", | |
" sys.stdout.flush()\n", | |
" image_data = tf.gfile.GFile(os.path.join(data_dir, lines[i][0]), 'rb').read()\n", | |
" image = sess.run(decode_jpeg, feed_dict={decode_jpeg_data: image_data})\n", | |
" height, width = image.shape[0], image.shape[1]\n", | |
" example = dataset_utils.image_to_tfexample(\n", | |
" image_data, b'jpg', height, width, int(lines[i][1]))\n", | |
" tfrecord_writer.write(example.SerializeToString())\n", | |
" tfrecord_writer.close()\n", | |
" sys.stdout.write('\\n')\n", | |
" sys.stdout.flush()\n", | |
" \n", | |
" \n", | |
"if __name__ == '__main__':\n", | |
" name='eggs'\n", | |
" classes={\n", | |
" \"Whipworm\":0,\n", | |
" \"TaeniaSolium\":1,\n", | |
" \"Paragonimusi\":2,\n", | |
" \"Hookworm\":3,\n", | |
" \"Fasciolopsis\":4,\n", | |
" \"Pinworm\":5,\n", | |
" \"FertilizedAscaris\":6,\n", | |
" \"Ascaris\":7,\n", | |
" \"Schistosoma\":8\n", | |
" }\n", | |
" generate_list(\n", | |
" classes=classes,\n", | |
" data_dir='datas',\n", | |
" list_path='list.txt',\n", | |
" train_list_path='list_train.txt',\n", | |
" val_list_path='list_val.txt'\n", | |
" )\n", | |
" convert_dataset(name,'list_train.txt', 'datas', 'data_set')\n", | |
" convert_dataset(name,'list_val.txt', 'datas', 'data_set', False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python [conda env:root] *", | |
"language": "python", | |
"name": "conda-root-py" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment