Last active
September 21, 2018 12:44
-
-
Save gvanhorn38/44414ef7a7a9fcada8a3cdb832061a30 to your computer and use it in GitHub Desktop.
CUB-200 Image Classification Train Configuration
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Training specific configuration | |
RANDOM_SEED : 1.0 | |
SESSION_CONFIG : { | |
LOG_DEVICE_PLACEMENT : false, | |
PER_PROCESS_GPU_MEMORY_FRACTION : 0.94 | |
} | |
################################################# | |
# Dataset Info | |
NUM_CLASSES : 200 | |
NUM_TRAIN_EXAMPLES : 5394 | |
NUM_TRAIN_ITERATIONS : 24000 | |
BATCH_SIZE : 32 | |
MODEL_NAME : 'inception_v3' | |
# END: Dataset Info | |
################################################# | |
# Image Processing and Augmentation | |
IMAGE_PROCESSING : { | |
INPUT_SIZE : 299, | |
# 1) First we extract regions from the image | |
REGION_TYPE : 'image', # one of 'image' or 'bbox' | |
BBOX_CFG : { | |
DO_EXPANSION : 1, | |
EXPANSION_CFG : { | |
WIDTH_EXPANSION_FACTOR : 2.0, # Expand the width by a factor of 2 (centrally) | |
HEIGHT_EXPANSION_FACTOR : 2.0, # Expand the height by a factor of 2 (centrally) | |
} | |
}, | |
WHOLE_IMAGE_CFG: {}, | |
# Fraction of time to take a random crop from the image, 0 is never, 1 is always | |
DO_RANDOM_CROP : 1, | |
RANDOM_CROP_CFG: { | |
MIN_AREA : 0.1, # between 0 and 1 | |
MAX_AREA : 1.0, # between 0 and 1 | |
MIN_ASPECT_RATIO : 0.75, | |
MAX_ASPECT_RATIO : 1.33, | |
MAX_ATTEMPTS : 100, # maximum number of attempts to satisfy the above constraints before returning the whole image | |
}, | |
# Alternatively we can take a central crop from the image | |
DO_CENTRAL_CROP : 0, # Fraction of the time to take a central crop, 0 is never, 1 is always | |
CENTRAL_CROP_FRACTION : 0.875, # Between 0 and 1, fraction of size to crop | |
# 2) We need to resize the extracted regions to feed into the network. | |
MAINTAIN_ASPECT_RATIO : false, | |
# Avoid slower resize operations (bi-cubic, etc.) | |
RESIZE_FAST : false, | |
# 3) We can flip the regions | |
# Randomly flip the image left right, 50% chance of flipping | |
DO_RANDOM_FLIP_LEFT_RIGHT : true, | |
# 4) We can distort the colors of the regions | |
# Color distortion | |
# The fraction of time to distort the color, 0 is never, 1 is always | |
DO_COLOR_DISTORTION : 1, | |
# Avoids slower ops (random_hue and random_contrast) | |
COLOR_DISTORT_FAST : false | |
} | |
# END: Image Processing and Augmentation | |
################################################# | |
# Queues | |
# | |
# Number of threads to populate the batch queue | |
NUM_INPUT_THREADS : 6 | |
# Should the data be shuffled? | |
SHUFFLE_QUEUE : true | |
# Capacity of the queue producing batched examples | |
QUEUE_CAPACITY : 4000 | |
# Minimum size of the queue to ensure good shuffling | |
QUEUE_MIN : 400 | |
# END: Queues | |
################################################# | |
# Learning Rate Parameters | |
LEARNING_RATE_DECAY_TYPE : 'exponential' # One of "fixed", "exponential", or "polynomial" | |
INITIAL_LEARNING_RATE : 0.0045 | |
END_LEARNING_RATE : 0.0001 # The minimal end learning rate used by a polynomial decay learning rate. | |
LABEL_SMOOTHING : 0.0 # The amount of label smoothing. | |
LEARNING_RATE_DECAY_FACTOR : 0.94 #0.94 | |
NUM_EPOCHS_PER_DELAY : 4 | |
LEARNING_RATE_STAIRCASE : true | |
# END: Learning Rate Parameters | |
################################################# | |
# Regularization | |
# | |
# The decay to use for the moving average. If 0, then moving average is not computed | |
MOVING_AVERAGE_DECAY : 0.9999 | |
# The weight decay on the model weights | |
WEIGHT_DECAY : 0.00004 | |
BATCHNORM_MOVING_AVERAGE_DECAY : 0.9997 | |
BATCHNORM_EPSILON : 0.001 | |
DROPOUT_KEEP_PROB : 1 | |
CLIP_GRADIENT_NORM : 0 # If 0, no clipping is performed. Otherwise acts as a threshold to clip the gradients. | |
# End: Regularization | |
################################################# | |
# Optimization | |
# | |
# The name of the optimizer, one of "adadelta", "adagrad", "adam", "ftrl", "momentum", "sgd" or "rmsprop" | |
OPTIMIZER : 'rmsprop' | |
OPTIMIZER_EPSILON : 1.0 | |
# The decay rate for adadelta. | |
ADADELTA_RHO: 0.95 | |
# Starting value for the AdaGrad accumulators. | |
ADAGRAD_INITIAL_ACCUMULATOR_VALUE: 0.1 | |
# The exponential decay rate for the 1st moment estimates. | |
ADAM_BETA1 : 0.9 | |
# The exponential decay rate for the 2nd moment estimates. | |
ADAM_BETA2 : 0.99 | |
# The learning rate power. | |
FTRL_LEARNING_RATE_POWER : -0.5 | |
# Starting value for the FTRL accumulators. | |
FTRL_INITIAL_ACCUMULATOR_VALUE : 0.1 | |
# The FTRL l1 regularization strength. | |
FTRL_L1 : 0.0 | |
# The FTRL l2 regularization strength. | |
FTRL_L2 : 0.0 | |
# The momentum for the MomentumOptimizer and RMSPropOptimizer | |
MOMENTUM : 0.9 | |
# Decay term for RMSProp. | |
RMSPROP_DECAY : 0.9 | |
# END: Optimization | |
################################################# | |
# Saving Models and Summaries | |
# | |
# How often, in seconds, to save summaries. | |
SAVE_SUMMARY_SECS : 30 | |
# How often, in seconds, to save the model | |
SAVE_INTERVAL_SECS : 300 | |
# The maximum number of recent checkpoint files to keep. | |
MAX_TO_KEEP : 10 | |
# In addition to keeping the most recent `max_to_keep` checkpoint files, | |
# you might want to keep one checkpoint file for every N hours of training | |
# The default value of 10,000 hours effectively disables the feature. | |
KEEP_CHECKPOINT_EVERY_N_HOURS : 1 | |
# The frequency, in terms of global steps, that the loss and global step and logged. | |
LOG_EVERY_N_STEPS : 10 | |
# END: Saving Models and Summaries | |
################################################# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment