Skip to content

Instantly share code, notes, and snippets.

@spurra
Created May 12, 2021 07:59
Show Gist options
  • Save spurra/d5b89caccbd614522eb19e6bc3a9e2d9 to your computer and use it in GitHub Desktop.
Save spurra/d5b89caccbd614522eb19e6bc3a9e2d9 to your computer and use it in GitHub Desktop.
training configuration for linear evaluation of VISSL's SimCLR implementation for RN50
CHECKPOINT:
APPEND_DISTR_RUN_ID: false
AUTO_RESUME: true
BACKEND: disk
CHECKPOINT_FREQUENCY: 1
CHECKPOINT_ITER_FREQUENCY: -1
DIR: checkpoints
LATEST_CHECKPOINT_RESUME_FILE_NUM: 1
OVERWRITE_EXISTING: false
USE_SYMLINK_CHECKPOINT_FOR_RESUME: false
CLUSTERFIT:
CLUSTER_BACKEND: faiss
FEATURES:
DATASET_NAME: ''
DATA_PARTITION: TRAIN
LAYER_NAME: ''
NUM_CLUSTERS: 16000
N_ITER: 50
DATA:
DDP_BUCKET_CAP_MB: 25
ENABLE_ASYNC_GPU_COPY: true
NUM_DATALOADER_WORKERS: 8
PIN_MEMORY: true
TEST:
BATCHSIZE_PER_REPLICA: 32
COLLATE_FUNCTION: default_collate
COLLATE_FUNCTION_PARAMS: {}
COPY_DESTINATION_DIR: /tmp/imagenet1k/
COPY_TO_LOCAL_DISK: false
DATASET_NAMES:
- imagenet1k_folder
DATA_LIMIT: -1
DATA_LIMIT_SAMPLING:
IS_BALANCED: false
SEED: 0
SKIP_NUM_SAMPLES: 0
DATA_PATHS: []
DATA_SOURCES:
- disk_folder
DEFAULT_GRAY_IMG_SIZE: 224
DROP_LAST: false
ENABLE_QUEUE_DATASET: false
INPUT_KEY_NAMES:
- data
LABEL_PATHS: []
LABEL_SOURCES:
- disk_folder
LABEL_TYPE: standard
MMAP_MODE: true
NEW_IMG_PATH_PREFIX: ''
REMOVE_IMG_PATH_PREFIX: ''
TARGET_KEY_NAMES:
- label
TRANSFORMS:
- name: Resize
size: 256
- name: CenterCrop
size: 224
- name: ToTensor
- mean:
- 0.485
- 0.456
- 0.406
name: Normalize
std:
- 0.229
- 0.224
- 0.225
USE_DEBUGGING_SAMPLER: false
USE_STATEFUL_DISTRIBUTED_SAMPLER: false
TRAIN:
BATCHSIZE_PER_REPLICA: 32
COLLATE_FUNCTION: default_collate
COLLATE_FUNCTION_PARAMS: {}
COPY_DESTINATION_DIR: /tmp/imagenet1k/
COPY_TO_LOCAL_DISK: false
DATASET_NAMES:
- imagenet1k_folder
DATA_LIMIT: -1
DATA_LIMIT_SAMPLING:
IS_BALANCED: false
SEED: 0
SKIP_NUM_SAMPLES: 0
DATA_PATHS: []
DATA_SOURCES:
- disk_folder
DEFAULT_GRAY_IMG_SIZE: 224
DROP_LAST: false
ENABLE_QUEUE_DATASET: false
INPUT_KEY_NAMES:
- data
LABEL_PATHS: []
LABEL_SOURCES:
- disk_folder
LABEL_TYPE: standard
MMAP_MODE: true
NEW_IMG_PATH_PREFIX: ''
REMOVE_IMG_PATH_PREFIX: ''
TARGET_KEY_NAMES:
- label
TRANSFORMS:
- name: RandomResizedCrop
size: 224
- name: RandomHorizontalFlip
- name: ToTensor
- mean:
- 0.485
- 0.456
- 0.406
name: Normalize
std:
- 0.229
- 0.224
- 0.225
USE_DEBUGGING_SAMPLER: false
USE_STATEFUL_DISTRIBUTED_SAMPLER: false
DISTRIBUTED:
BACKEND: nccl
BROADCAST_BUFFERS: true
INIT_METHOD: tcp
MANUAL_GRADIENT_REDUCTION: false
NCCL_DEBUG: false
NCCL_SOCKET_NTHREADS: ''
NUM_NODES: 1
NUM_PROC_PER_NODE: 1
RUN_ID: auto
HOOKS:
LOG_GPU_STATS: true
MEMORY_SUMMARY:
LOG_ITERATION_NUM: 0
PRINT_MEMORY_SUMMARY: true
MODEL_COMPLEXITY:
COMPUTE_COMPLEXITY: false
INPUT_SHAPE:
- 3
- 224
- 224
PERF_STATS:
MONITOR_PERF_STATS: true
PERF_STAT_FREQUENCY: -1
ROLLING_BTIME_FREQ: -1
TENSORBOARD_SETUP:
EXPERIMENT_LOG_DIR: tensorboard
FLUSH_EVERY_N_MIN: 5
LOG_DIR: checkpoints
LOG_PARAMS: true
LOG_PARAMS_EVERY_N_ITERS: 310
LOG_PARAMS_GRADIENTS: true
USE_TENSORBOARD: true
IMG_RETRIEVAL:
DATASET_PATH: ''
EVAL_BINARY_PATH: ''
EVAL_DATASET_NAME: Paris
FEATS_PROCESSING_TYPE: ''
GEM_POOL_POWER: 4.0
N_PCA: 512
RESIZE_IMG: 1024
SHOULD_TRAIN_PCA_OR_WHITENING: true
SPATIAL_LEVELS: 3
TEMP_DIR: /tmp/instance_retrieval/
TRAIN_DATASET_NAME: Oxford
WHITEN_IMG_LIST: ''
LOG_FREQUENCY: 200
LOSS:
CrossEntropyLoss:
ignore_index: -1
bce_logits_multiple_output_single_target:
normalize_output: false
reduction: none
world_size: 1
cross_entropy_multiple_output_single_target:
ignore_index: -1
normalize_output: false
reduction: mean
temperature: 1.0
weight: null
deepclusterv2_loss:
BATCHSIZE_PER_REPLICA: 256
DROP_LAST: true
kmeans_iters: 10
memory_params:
crops_for_mb:
- 0
embedding_dim: 128
num_clusters:
- 3000
- 3000
- 3000
num_crops: 2
num_train_samples: -1
temperature: 0.1
moco_loss:
embedding_dim: 128
momentum: 0.999
queue_size: 65536
temperature: 0.2
multicrop_simclr_info_nce_loss:
buffer_params:
effective_batch_size: 4096
embedding_dim: 128
world_size: 64
num_crops: 2
temperature: 0.1
name: cross_entropy_multiple_output_single_target
nce_loss_with_memory:
loss_type: nce
loss_weights:
- 1.0
memory_params:
embedding_dim: 128
memory_size: -1
momentum: 0.5
norm_init: true
update_mem_on_forward: true
negative_sampling_params:
num_negatives: 16000
type: random
norm_constant: -1
norm_embedding: true
num_train_samples: -1
temperature: 0.07
update_mem_with_emb_index: -100
simclr_info_nce_loss:
buffer_params:
effective_batch_size: 4096
embedding_dim: 128
world_size: 64
temperature: 0.1
swav_loss:
crops_for_assign:
- 0
- 1
embedding_dim: 128
epsilon: 0.05
normalize_last_layer: true
num_crops: 2
num_iters: 3
num_prototypes:
- 3000
output_dir: .
queue:
local_queue_length: 0
queue_length: 0
start_iter: 0
temp_hard_assignment_iters: 0
temperature: 0.1
use_double_precision: false
swav_momentum_loss:
crops_for_assign:
- 0
- 1
embedding_dim: 128
epsilon: 0.05
momentum: 0.99
momentum_eval_mode_iter_start: 0
normalize_last_layer: true
num_crops: 2
num_iters: 3
num_prototypes:
- 3000
queue:
local_queue_length: 0
queue_length: 0
start_iter: 0
temperature: 0.1
use_double_precision: false
MACHINE:
DEVICE: gpu
METERS:
accuracy_list_meter:
meter_names:
- conv1
- res2
- res3
- res4
- res5
num_meters: 5
topk_values:
- 1
- 5
enable_training_meter: true
mean_ap_list_meter:
max_cpu_capacity: -1
meter_names: []
num_classes: 9605
num_meters: 1
name: accuracy_list_meter
MODEL:
ACTIVATION_CHECKPOINTING:
NUM_ACTIVATION_CHECKPOINTING_SPLITS: 2
USE_ACTIVATION_CHECKPOINTING: false
AMP_PARAMS:
AMP_ARGS:
opt_level: O1
AMP_TYPE: apex
USE_AMP: false
CUDA_CACHE:
CLEAR_CUDA_CACHE: false
CLEAR_FREQ: 100
FEATURE_EVAL_SETTINGS:
EVAL_MODE_ON: true
EVAL_TRUNK_AND_HEAD: false
EXTRACT_TRUNK_FEATURES_ONLY: false
FREEZE_TRUNK_AND_HEAD: false
FREEZE_TRUNK_ONLY: true
LINEAR_EVAL_FEAT_POOL_OPS_MAP:
- - conv1
- - AvgPool2d
- - - 10
- 10
- 10
- 4
- - res2
- - AvgPool2d
- - - 16
- 16
- 8
- 0
- - res3
- - AvgPool2d
- - - 13
- 13
- 5
- 0
- - res4
- - AvgPool2d
- - - 8
- 8
- 3
- 0
- - res5
- - AvgPool2d
- - - 6
- 6
- 1
- 0
SHOULD_FLATTEN_FEATS: false
FSDP_CONFIG:
compute_dtype: float32
flatten_parameters: true
fp32_reduce_scatter: false
mixed_precision: true
GRAD_CLIP:
MAX_NORM: 1
NORM_TYPE: 2
USE_GRAD_CLIP: false
HEAD:
BATCHNORM_EPS: 1.0e-05
BATCHNORM_MOMENTUM: 0.1
PARAMS:
- - eval_mlp
- dims:
- 9216
- 1000
in_channels: 64
- - eval_mlp
- dims:
- 9216
- 1000
in_channels: 256
- - eval_mlp
- dims:
- 8192
- 1000
in_channels: 512
- - eval_mlp
- dims:
- 9216
- 1000
in_channels: 1024
- - eval_mlp
- dims:
- 8192
- 1000
in_channels: 2048
PARAMS_MULTIPLIER: 1.0
INPUT_TYPE: rgb
MULTI_INPUT_HEAD_MAPPING: []
NON_TRAINABLE_PARAMS: []
SHARDED_DDP_SETUP:
USE_SDP: false
reduce_buffer_size: -1
SINGLE_PASS_EVERY_CROP: false
SYNC_BN_CONFIG:
CONVERT_BN_TO_SYNC_BN: true
GROUP_SIZE: 8
SYNC_BN_TYPE: pytorch
TEMP_FROZEN_PARAMS_ITER_MAP: []
TRUNK:
CONVIT:
CLASS_TOKEN_IN_LOCAL_LAYERS: false
LOCALITY_DIM: 10
LOCALITY_STRENGTH: 1.0
N_GPSA_LAYERS: 10
USE_LOCAL_INIT: true
EFFICIENT_NETS: {}
NAME: resnet
REGNET: {}
RESNETS:
DEPTH: 50
GROUPNORM_GROUPS: 32
GROUPS: 1
LAYER4_STRIDE: 2
NORM: BatchNorm
STANDARDIZE_CONVOLUTIONS: false
WIDTH_MULTIPLIER: 1
WIDTH_PER_GROUP: 64
ZERO_INIT_RESIDUAL: false
VISION_TRANSFORMERS:
ATTENTION_DROPOUT_RATE: 0
CLASSIFIER: token
DROPOUT_RATE: 0
DROP_PATH_RATE: 0
HIDDEN_DIM: 768
IMAGE_SIZE: 224
MLP_DIM: 3072
NUM_HEADS: 12
NUM_LAYERS: 12
PATCH_SIZE: 16
QKV_BIAS: false
QK_SCALE: false
name: null
WEIGHTS_INIT:
APPEND_PREFIX: ''
PARAMS_FILE: model_zoo/model_final_checkpoint_phase99.torch
REMOVE_PREFIX: ''
SKIP_LAYERS:
- num_batches_tracked
STATE_DICT_KEY_NAME: classy_state_dict
_MODEL_INIT_SEED: 1
MONITORING:
MONITOR_ACTIVATION_STATISTICS: 0
MULTI_PROCESSING_METHOD: forkserver
NEAREST_NEIGHBOR:
L2_NORM_FEATS: false
SIGMA: 0.1
TOPK: 200
OPTIMIZER:
betas:
- 0.9
- 0.999
construct_single_param_group_only: false
head_optimizer_params:
use_different_lr: false
use_different_wd: false
weight_decay: 0.0005
larc_config:
clip: false
eps: 1.0e-08
trust_coefficient: 0.001
momentum: 0.9
name: sgd
nesterov: true
non_regularized_parameters: []
num_epochs: 28
param_schedulers:
lr:
auto_lr_scaling:
auto_scale: true
base_lr_batch_size: 256
base_value: 0.01
scaling_type: linear
end_value: 0.0
interval_scaling: &id001 []
lengths: &id002 []
milestones: &id003
- 8
- 16
- 24
name: multistep
schedulers: &id004 []
start_value: 0.1
update_interval: epoch
value: 0.1
values: &id005
- 0.00125
- 0.000125
- 1.25e-05
- 1.25e-06
lr_head:
auto_lr_scaling:
auto_scale: true
base_lr_batch_size: 256
base_value: 0.01
scaling_type: linear
end_value: 0.0
interval_scaling: *id001
lengths: *id002
milestones: *id003
name: multistep
schedulers: *id004
start_value: 0.1
update_interval: epoch
value: 0.1
values: *id005
regularize_bias: true
regularize_bn: false
use_larc: false
use_zero: false
weight_decay: 0.0005
PROFILING:
MEMORY_PROFILING:
TRACK_BY_LAYER_MEMORY: false
NUM_ITERATIONS: 10
OUTPUT_FOLDER: .
PROFILED_RANKS:
- 0
- 1
START_ITERATION: 0
REPRODUCIBILITY:
CUDDN_DETERMINISTIC: false
SEED_VALUE: 1
SLURM:
ADDITIONAL_PARAMETERS: {}
COMMENT: vissl job
CONSTRAINT: ''
LOG_FOLDER: .
MEM_GB: 250
NAME: vissl
NUM_CPU_PER_PROC: 8
PARTITION: learnfair
PORT_ID: 40050
TIME_HOURS: 72
TIME_MINUTES: 0
USE_SLURM: false
SVM:
cls_list: []
costs:
base: -1.0
costs_list:
- 0.1
- 0.01
power_range:
- 4
- 20
cross_val_folds: 3
dual: true
force_retrain: false
loss: squared_hinge
low_shot:
dataset_name: voc
k_values:
- 1
- 2
- 4
- 8
- 16
- 32
- 64
- 96
sample_inds:
- 1
- 2
- 3
- 4
- 5
max_iter: 2000
normalize: true
penalty: l2
TEST_EVERY_NUM_EPOCH: 1
TEST_MODEL: true
TEST_ONLY: false
TRAINER:
TASK_NAME: self_supervision_task
TRAIN_STEP_NAME: standard_train_step
VERBOSE: true
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment