Skip to content

Instantly share code, notes, and snippets.

@gvanhorn38
Last active September 21, 2018 12:44
Show Gist options
  • Save gvanhorn38/44414ef7a7a9fcada8a3cdb832061a30 to your computer and use it in GitHub Desktop.
Save gvanhorn38/44414ef7a7a9fcada8a3cdb832061a30 to your computer and use it in GitHub Desktop.
CUB-200 Image Classification Train Configuration
# Training specific configuration
RANDOM_SEED : 1.0
SESSION_CONFIG : {
LOG_DEVICE_PLACEMENT : false,
PER_PROCESS_GPU_MEMORY_FRACTION : 0.94
}
#################################################
# Dataset Info
NUM_CLASSES : 200
NUM_TRAIN_EXAMPLES : 5394
NUM_TRAIN_ITERATIONS : 24000
BATCH_SIZE : 32
MODEL_NAME : 'inception_v3'
# END: Dataset Info
#################################################
# Image Processing and Augmentation
IMAGE_PROCESSING : {
INPUT_SIZE : 299,
# 1) First we extract regions from the image
REGION_TYPE : 'image', # one of 'image' or 'bbox'
BBOX_CFG : {
DO_EXPANSION : 1,
EXPANSION_CFG : {
WIDTH_EXPANSION_FACTOR : 2.0, # Expand the width by a factor of 2 (centrally)
HEIGHT_EXPANSION_FACTOR : 2.0, # Expand the height by a factor of 2 (centrally)
}
},
WHOLE_IMAGE_CFG: {},
# Fraction of time to take a random crop from the image, 0 is never, 1 is always
DO_RANDOM_CROP : 1,
RANDOM_CROP_CFG: {
MIN_AREA : 0.1, # between 0 and 1
MAX_AREA : 1.0, # between 0 and 1
MIN_ASPECT_RATIO : 0.75,
MAX_ASPECT_RATIO : 1.33,
MAX_ATTEMPTS : 100, # maximum number of attempts to satisfy the above constraints before returning the whole image
},
# Alternatively we can take a central crop from the image
DO_CENTRAL_CROP : 0, # Fraction of the time to take a central crop, 0 is never, 1 is always
CENTRAL_CROP_FRACTION : 0.875, # Between 0 and 1, fraction of size to crop
# 2) We need to resize the extracted regions to feed into the network.
MAINTAIN_ASPECT_RATIO : false,
# Avoid slower resize operations (bi-cubic, etc.)
RESIZE_FAST : false,
# 3) We can flip the regions
# Randomly flip the image left right, 50% chance of flipping
DO_RANDOM_FLIP_LEFT_RIGHT : true,
# 4) We can distort the colors of the regions
# Color distortion
# The fraction of time to distort the color, 0 is never, 1 is always
DO_COLOR_DISTORTION : 1,
# Avoids slower ops (random_hue and random_contrast)
COLOR_DISTORT_FAST : false
}
# END: Image Processing and Augmentation
#################################################
# Queues
#
# Number of threads to populate the batch queue
NUM_INPUT_THREADS : 6
# Should the data be shuffled?
SHUFFLE_QUEUE : true
# Capacity of the queue producing batched examples
QUEUE_CAPACITY : 4000
# Minimum size of the queue to ensure good shuffling
QUEUE_MIN : 400
# END: Queues
#################################################
# Learning Rate Parameters
LEARNING_RATE_DECAY_TYPE : 'exponential' # One of "fixed", "exponential", or "polynomial"
INITIAL_LEARNING_RATE : 0.0045
END_LEARNING_RATE : 0.0001 # The minimal end learning rate used by a polynomial decay learning rate.
LABEL_SMOOTHING : 0.0 # The amount of label smoothing.
LEARNING_RATE_DECAY_FACTOR : 0.94 #0.94
NUM_EPOCHS_PER_DELAY : 4
LEARNING_RATE_STAIRCASE : true
# END: Learning Rate Parameters
#################################################
# Regularization
#
# The decay to use for the moving average. If 0, then moving average is not computed
MOVING_AVERAGE_DECAY : 0.9999
# The weight decay on the model weights
WEIGHT_DECAY : 0.00004
BATCHNORM_MOVING_AVERAGE_DECAY : 0.9997
BATCHNORM_EPSILON : 0.001
DROPOUT_KEEP_PROB : 1
CLIP_GRADIENT_NORM : 0 # If 0, no clipping is performed. Otherwise acts as a threshold to clip the gradients.
# End: Regularization
#################################################
# Optimization
#
# The name of the optimizer, one of "adadelta", "adagrad", "adam", "ftrl", "momentum", "sgd" or "rmsprop"
OPTIMIZER : 'rmsprop'
OPTIMIZER_EPSILON : 1.0
# The decay rate for adadelta.
ADADELTA_RHO: 0.95
# Starting value for the AdaGrad accumulators.
ADAGRAD_INITIAL_ACCUMULATOR_VALUE: 0.1
# The exponential decay rate for the 1st moment estimates.
ADAM_BETA1 : 0.9
# The exponential decay rate for the 2nd moment estimates.
ADAM_BETA2 : 0.99
# The learning rate power.
FTRL_LEARNING_RATE_POWER : -0.5
# Starting value for the FTRL accumulators.
FTRL_INITIAL_ACCUMULATOR_VALUE : 0.1
# The FTRL l1 regularization strength.
FTRL_L1 : 0.0
# The FTRL l2 regularization strength.
FTRL_L2 : 0.0
# The momentum for the MomentumOptimizer and RMSPropOptimizer
MOMENTUM : 0.9
# Decay term for RMSProp.
RMSPROP_DECAY : 0.9
# END: Optimization
#################################################
# Saving Models and Summaries
#
# How often, in seconds, to save summaries.
SAVE_SUMMARY_SECS : 30
# How often, in seconds, to save the model
SAVE_INTERVAL_SECS : 300
# The maximum number of recent checkpoint files to keep.
MAX_TO_KEEP : 10
# In addition to keeping the most recent `max_to_keep` checkpoint files,
# you might want to keep one checkpoint file for every N hours of training
# The default value of 10,000 hours effectively disables the feature.
KEEP_CHECKPOINT_EVERY_N_HOURS : 1
# The frequency, in terms of global steps, that the loss and global step and logged.
LOG_EVERY_N_STEPS : 10
# END: Saving Models and Summaries
#################################################
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment