需要把用到的数据转换成 TfRecord 格式。这里以Flower数据集为例。
这里使用 LeNet5 作为分类网络。
- 原始版
- 修正错误
- 将输入数据在TensorBoard里进行可视化
- 加入自定义网络
- 更多的可视化
| # -*- coding: utf-8 -*- | |
| from __future__ import absolute_import | |
| from __future__ import division | |
| from __future__ import print_function | |
| import os | |
| import random | |
| from tqdm import tqdm | |
| import numpy as np | |
| import tensorflow as tf | |
| from skimage import io, transform, color, util | |
| flags = tf.app.flags | |
| flags.DEFINE_string(flag_name='directory', default_value='/home/haoyu/Datasets/flowers/flower_photos', docstring='数据地址') | |
| flags.DEFINE_string(flag_name='save_dir', default_value='/home/haoyu/Datasets/flowers/tfrecords', docstring='保存地址') | |
| flags.DEFINE_integer(flag_name='test_size', default_value=350, docstring='测试集大小') | |
| FLAGS = flags.FLAGS | |
| MODES = [tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT] | |
| def _float_feature(value): | |
| if not isinstance(value, list): | |
| value = [value] | |
| return tf.train.Feature(int64_list=tf.train.FloatList(value=value)) | |
| def _int_feature(value): | |
| if not isinstance(value, list): | |
| value = [value] | |
| return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) | |
| def _bytes_feature(value): | |
| if not isinstance(value, list): | |
| value = [value] | |
| return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) | |
| def convert_to_tfrecord(mode, anno): | |
| """转换为TfRecord""" | |
| assert mode in MODES, "模式错误" | |
| filename = os.path.join(FLAGS.save_dir, mode + '.tfrecords') | |
| with tf.python_io.TFRecordWriter(filename) as writer: | |
| for fnm, cls in tqdm(anno): | |
| # 读取图片、转换 | |
| img = io.imread(fnm) | |
| img = color.rgb2gray(img) | |
| img = transform.resize(img, [28, 28]) | |
| # 获取转换后的信息 | |
| if 3 == img.ndim: | |
| rows, cols, depth = img.shape | |
| else: | |
| rows, cols = img.shape | |
| depth = 1 | |
| # 创建Example对象 | |
| example = tf.train.Example( | |
| features=tf.train.Features( | |
| feature={ | |
| 'image/height': _int_feature(rows), | |
| 'image/width': _int_feature(cols), | |
| 'image/depth': _int_feature(depth), | |
| 'image/class/label': _int_feature(cls), | |
| 'image/encoded': _bytes_feature(img.astype(np.float32).tobytes()) | |
| } | |
| ) | |
| ) | |
| # 序列化并保存 | |
| writer.write(example.SerializeToString()) | |
| def get_folder_name(folder): | |
| """不递归,获取特定文件夹下所有文件夹名""" | |
| fs = os.listdir(folder) | |
| fs = [x for x in fs if os.path.isdir(os.path.join(folder, x))] | |
| return sorted(fs) | |
| def get_file_name(folder): | |
| """不递归,获取特定文件夹下所有文件名""" | |
| fs = os.listdir(folder) | |
| fs = map(lambda x: os.path.join(folder, x), fs) | |
| fs = [x for x in fs if os.path.isfile(x)] | |
| return fs | |
| def get_annotations(directory, classes): | |
| """获取所有图片路径和标签""" | |
| files = [] | |
| labels = [] | |
| for ith, val in enumerate(classes): | |
| fi = get_file_name(os.path.join(directory, val)) | |
| files.extend(fi) | |
| labels.extend([ith] * len(fi)) | |
| assert len(files) == len(labels), "图片和标签数量不等" | |
| # 将图片路径和标签拼合在一起 | |
| annotation = [x for x in zip(files, labels)] | |
| # 随机打乱 | |
| random.shuffle(annotation) | |
| return annotation | |
| def main(_): | |
| class_names = get_folder_name(FLAGS.directory) | |
| annotation = get_annotations(FLAGS.directory, class_names) | |
| convert_to_tfrecord(tf.estimator.ModeKeys.TRAIN, annotation[FLAGS.test_size:]) | |
| convert_to_tfrecord(tf.estimator.ModeKeys.EVAL, annotation[:FLAGS.test_size]) | |
| if __name__ == '__main__': | |
| tf.logging.set_verbosity(tf.logging.INFO) | |
| tf.app.run() |
| # 如何设置学习率如何衰减 | |
| learning_rate = tf.train.polynomial_decay(0.001, tf.train.get_or_create_global_step(), 100000, 5e-6, power=4) | |
| tf.summary.scalar('learning_rate', learning_rate) | |
| optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) # 注意这里 | |
| # 如何设置训练 | |
| if mode == tf.estimator.ModeKeys.TRAIN: | |
| optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9) | |
| train_op = slim.learning.create_train_op(loss, optimizer, tf.train.get_or_create_global_step()) # 有BatchNorm的时候需要用这个 | |
| train_op = optimizer.minimize(loss, tf.train.get_or_create_global_step()) # 没有BatchNorm的时候可以用这个 | |
| else: | |
| train_op = None | |
| # 获取并可视化训练精度 | |
| accuracy = tf.metrics.accuracy(tf.argmax(labels, axis=1), predictions['classes'], name='accuracy') # Top 1 精度 | |
| accuracy_topk = tf.metrics.mean(tf.nn.in_top_k(predictions['probabilities'], tf.argmax(labels, axis=1), 2), name='accuracy_topk') # Top K 精度 | |
| metrics = {'test_accuracy': accuracy, 'test_accuracy_topk': accuracy_topk} | |
| # 可视化训练精度 | |
| tf.summary.scalar('train_accuracy', accuracy[1]) | |
| tf.summary.scalar('train_accuracy_topk', accuracy_topk[1]) |
| # -*- coding: utf-8 -*- | |
| from __future__ import absolute_import | |
| from __future__ import division | |
| from __future__ import print_function | |
| import os | |
| import tensorflow as tf | |
| flags = tf.app.flags | |
| flags.DEFINE_integer(flag_name='batch_size', default_value=128, docstring='Batch 大小') | |
| flags.DEFINE_string(flag_name='data_dir', default_value='/home/haoyu/Datasets/flowers/tfrecords', docstring='数据存放位置') | |
| flags.DEFINE_string(flag_name='model_dir', default_value='/tmp/flower_model', docstring='模型存放位置') | |
| flags.DEFINE_integer(flag_name='steps', default_value=500, docstring='训练步数') | |
| flags.DEFINE_integer(flag_name='classes', default_value=5, docstring='类别数量') | |
| FLAGS = flags.FLAGS | |
| MODES = [tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT] | |
| def input_fn(mode, batch_size=1): | |
| """输入函数""" | |
| def parser(serialized_example): | |
| """如何处理数据集中的每一个数据""" | |
| # 解析单个example对象 | |
| features = tf.parse_single_example( | |
| serialized_example, | |
| features={ | |
| 'image/height': tf.FixedLenFeature([], tf.int64), | |
| 'image/width': tf.FixedLenFeature([], tf.int64), | |
| 'image/depth': tf.FixedLenFeature([], tf.int64), | |
| 'image/encoded': tf.FixedLenFeature([], tf.string), | |
| 'image/class/label': tf.FixedLenFeature([], tf.int64), | |
| }) | |
| # 获取参数 | |
| height = tf.cast(features['image/height'], tf.int32) | |
| width = tf.cast(features['image/width'], tf.int32) | |
| depth = tf.cast(features['image/depth'], tf.int32) | |
| # 还原image | |
| image = tf.decode_raw(features['image/encoded'], tf.float32) | |
| image = tf.reshape(image, [height, width, depth]) | |
| image = image - 0.5 | |
| # 还原label | |
| label = tf.cast(features['image/class/label'], tf.int32) | |
| return image, tf.one_hot(label, FLAGS.classes) | |
| if mode in MODES: | |
| tfrecords_file = os.path.join(FLAGS.data_dir, mode + '.tfrecords') | |
| else: | |
| raise ValueError("Mode 未知") | |
| assert tf.gfile.Exists(tfrecords_file), ('TFRrecords 文件不存在') | |
| # 创建数据集 | |
| dataset = tf.contrib.data.TFRecordDataset([tfrecords_file]) | |
| # 创建映射 | |
| dataset = dataset.map(parser, num_threads=1, output_buffer_size=batch_size) | |
| # 设置batch | |
| dataset = dataset.batch(batch_size) | |
| # 如果是训练,那么就永久循环下去 | |
| if mode == tf.estimator.ModeKeys.TRAIN: | |
| dataset = dataset.repeat() | |
| # 创建迭代器 | |
| iterator = dataset.make_one_shot_iterator() | |
| # 获取 feature 和 label | |
| images, labels = iterator.get_next() | |
| return images, labels | |
| def my_model(inputs, mode): | |
| """写一个网络""" | |
| net = tf.reshape(inputs, [-1, 28, 28, 1]) | |
| net = tf.layers.conv2d(net, 32, [5, 5], padding='same', activation=tf.nn.relu) | |
| net = tf.layers.max_pooling2d(net, [2, 2], strides=2) | |
| net = tf.layers.conv2d(net, 64, [5, 5], padding='same', activation=tf.nn.relu) | |
| net = tf.layers.max_pooling2d(net, [2, 2], strides=2) | |
| net = tf.reshape(net, [-1, 7 * 7 * 64]) | |
| net = tf.layers.dense(net, 1024, activation=tf.nn.relu) | |
| net = tf.layers.dropout(net, 0.4, training=(mode == tf.estimator.ModeKeys.TRAIN)) | |
| net = tf.layers.dense(net, FLAGS.classes) | |
| return net | |
| def my_model_fn(features, labels, mode): | |
| """模型函数""" | |
| # 可视化输入 | |
| tf.summary.image('images', features) | |
| # 创建网络 | |
| logits = my_model(features, mode) | |
| predictions = { | |
| 'classes': tf.argmax(input=logits, axis=1), | |
| 'probabilities': tf.nn.softmax(logits, name='softmax_tensor') | |
| } | |
| # 如果是PREDICT,那么只需要predictions就够了 | |
| if mode == tf.estimator.ModeKeys.PREDICT: | |
| return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) | |
| # 创建Loss | |
| loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits, scope='loss') | |
| tf.summary.scalar('train_loss', loss) | |
| # 设置如何训练 | |
| if mode == tf.estimator.ModeKeys.TRAIN: | |
| optimizer = tf.train.AdamOptimizer(learning_rate=1e-3) | |
| train_op = optimizer.minimize(loss, tf.train.get_or_create_global_step()) | |
| else: | |
| train_op = None | |
| # 获取训练精度 | |
| accuracy = tf.metrics.accuracy( | |
| tf.argmax(labels, axis=1), predictions['classes'], | |
| name='accuracy') | |
| accuracy_topk = tf.metrics.mean( | |
| tf.nn.in_top_k(predictions['probabilities'], tf.argmax(labels, axis=1), 2), | |
| name='accuracy_topk') | |
| metrics = { | |
| 'test_accuracy': accuracy, | |
| 'test_accuracy_topk': accuracy_topk | |
| } | |
| # 可视化训练精度 | |
| tf.summary.scalar('train_accuracy', accuracy[1]) | |
| tf.summary.scalar('train_accuracy_topk', accuracy_topk[1]) | |
| return tf.estimator.EstimatorSpec( | |
| mode=mode, | |
| predictions=predictions, | |
| loss=loss, | |
| train_op=train_op, | |
| eval_metric_ops=metrics) | |
| def main(_): | |
| # 监视器 | |
| logging_hook = tf.train.LoggingTensorHook( | |
| every_n_iter=100, | |
| tensors={ | |
| 'accuracy': 'accuracy/value', | |
| 'accuracy_topk': 'accuracy_topk/value', | |
| 'loss': 'loss/value' | |
| }, | |
| ) | |
| # 创建 Estimator | |
| mnist_classifier = tf.estimator.Estimator( | |
| model_fn=my_model_fn, | |
| model_dir=FLAGS.model_dir) | |
| for i in range(20): | |
| # 训练 | |
| mnist_classifier.train( | |
| input_fn=lambda: input_fn(tf.estimator.ModeKeys.TRAIN, FLAGS.batch_size), | |
| steps=FLAGS.steps, | |
| hooks=[logging_hook]) | |
| # 测试并输出结果 | |
| print("=" * 10, "Testing", "=" * 10) | |
| eval_results = mnist_classifier.evaluate( | |
| input_fn=lambda: input_fn(tf.estimator.ModeKeys.EVAL)) | |
| print('Evaluation results:\n\t{}'.format(eval_results)) | |
| print("=" * 30) | |
| if __name__ == '__main__': | |
| tf.logging.set_verbosity(tf.logging.INFO) | |
| tf.app.run() |
| # Tiny Yolo | |
| def my_model(inputs, mode): | |
| """写一个网络""" | |
| net = tf.reshape(inputs, [-1, 224, 224, 3]) | |
| net = tf.layers.conv2d(net, 16, [3, 3], padding='same', activation=tf.nn.relu) | |
| net = tf.layers.max_pooling2d(net, [2, 2], strides=2) | |
| net = tf.layers.conv2d(net, 32, [3, 3], padding='same', activation=tf.nn.relu) | |
| net = tf.layers.max_pooling2d(net, [2, 2], strides=2) | |
| net = tf.layers.conv2d(net, 16, [1, 1], padding='same', activation=tf.nn.relu) | |
| net = tf.layers.conv2d(net, 128, [3, 3], padding='same', activation=tf.nn.relu) | |
| net = tf.layers.conv2d(net, 16, [1, 1], padding='same', activation=tf.nn.relu) | |
| net = tf.layers.conv2d(net, 128, [3, 3], padding='same', activation=tf.nn.relu) | |
| net = tf.layers.max_pooling2d(net, [2, 2], strides=2) | |
| net = tf.layers.conv2d(net, 32, [1, 1], padding='same', activation=tf.nn.relu) | |
| net = tf.layers.conv2d(net, 256, [3, 3], padding='same', activation=tf.nn.relu) | |
| net = tf.layers.conv2d(net, 32, [1, 1], padding='same', activation=tf.nn.relu) | |
| net = tf.layers.conv2d(net, 256, [3, 3], padding='same', activation=tf.nn.relu) | |
| net = tf.layers.max_pooling2d(net, [2, 2], strides=2) | |
| net = tf.layers.conv2d(net, 64, [1, 1], padding='same', activation=tf.nn.relu) | |
| net = tf.layers.conv2d(net, 512, [3, 3], padding='same', activation=tf.nn.relu) | |
| net = tf.layers.conv2d(net, 64, [1, 1], padding='same', activation=tf.nn.relu) | |
| net = tf.layers.conv2d(net, 512, [3, 3], padding='same', activation=tf.nn.relu) | |
| net = tf.layers.conv2d(net, 128, [1, 1], padding='same', activation=tf.nn.relu) | |
| net = tf.layers.conv2d(net, 5, [1, 1], padding='same', activation=tf.nn.relu) | |
| _b, _h, _w, _c = net.shape | |
| net = tf.layers.average_pooling2d(net, [_h, _w], 1) | |
| net = tf.reshape(net, [-1, 5]) | |
| return net | |
| # Tiny Yolo | |
| # 这个需要把 learning rate 调到 2e-4 左右才能收敛 | |
| def my_model(inputs, mode): | |
| with slim.arg_scope([slim.conv2d], padding='SAME', | |
| weights_initializer=tf.truncated_normal_initializer(stddev=0.1), | |
| weights_regularizer=slim.l2_regularizer(0.0005)): | |
| net = tf.reshape(inputs, [-1, 224, 224, 3]) | |
| net = slim.conv2d(net, 16, [3, 3]) | |
| net = slim.max_pool2d(net, [2, 2]) | |
| net = slim.conv2d(net, 32, [3, 3]) | |
| net = slim.max_pool2d(net, [2, 2]) | |
| net = slim.stack(net, slim.conv2d, [(16, [1, 1]), (128, [3, 3]), (16, [1, 1]), (128, [3, 3])]) | |
| net = slim.max_pool2d(net, [2, 2]) | |
| net = slim.stack(net, slim.conv2d, [(32, [1, 1]), (256, [3, 3]), (32, [1, 1]), (256, [3, 3])]) | |
| net = slim.max_pool2d(net, [2, 2]) | |
| net = slim.stack(net, slim.conv2d, [(64, [1, 1]), (512, [3, 3]), (64, [1, 1]), (512, [3, 3])]) | |
| net = slim.conv2d(net, 128, [1, 1]) | |
| net = slim.conv2d(net, 5, [1, 1], activation_fn=None) | |
| _b, _h, _w, _c = net.shape | |
| net = slim.avg_pool2d(net, [_h, _w], 1) | |
| net = tf.reshape(net, [-1, 5]) | |
| return net | |
| # Darknet19 | |
| def my_model(inputs, mode): | |
| with slim.arg_scope([slim.conv2d], padding='SAME', | |
| weights_initializer=tf.truncated_normal_initializer(stddev=0.1), | |
| weights_regularizer=slim.l2_regularizer(0.0005), | |
| normalizer_fn=slim.batch_norm, | |
| normalizer_params={'is_training': mode == tf.estimator.ModeKeys.TRAIN} | |
| ): | |
| net = tf.reshape(inputs, [-1, 224, 224, 3]) | |
| net = slim.conv2d(net, 32, [3, 3]) | |
| net = slim.max_pool2d(net, [2, 2]) | |
| net = slim.conv2d(net, 64, [3, 3]) | |
| net = slim.max_pool2d(net, [2, 2]) | |
| net = slim.stack(net, slim.conv2d, [ | |
| (128, [3, 3]), (64, [1, 1]), (128, [3, 3])]) | |
| net = slim.max_pool2d(net, [2, 2]) | |
| net = slim.stack(net, slim.conv2d, [ | |
| (256, [3, 3]), (128, [1, 1]), (256, [3, 3])]) | |
| net = slim.max_pool2d(net, [2, 2]) | |
| net = slim.stack(net, slim.conv2d, [ | |
| (512, [3, 3]), (256, [1, 1]), (512, [3, 3]), (256, [1, 1]), (512, [3, 3])]) | |
| net = slim.max_pool2d(net, [2, 2]) | |
| net = slim.stack(net, slim.conv2d, [ | |
| (1024, [3, 3]), (512, [1, 1]), (1024, [3, 3]), (512, [1, 1]), (1024, [3, 3])]) | |
| net = slim.conv2d(net, FLAGS.classes, [1, 1]) | |
| _b, _h, _w, _c = net.shape | |
| net = slim.avg_pool2d(net, [_h, _w], 1) | |
| net = tf.reshape(net, [-1, FLAGS.classes]) | |
| return net |