Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Command to run:
NCCL_DEBUG=WARN CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 ./scripts/dist_run_single.sh 6 contrastive_pretrain/train_end2end.py ./cfgs/contrastive_pretrain/base_prec_random_movienet_images_4x16G_fp32.yaml ./checkpoints_debug2 | tee debug2.txt
Namespace(cfg='./cfgs/contrastive_pretrain/base_prec_random_movienet_images_4x16G_fp32.yaml', cudnn_off=False, dist=True, do_test=False, log_dir='./checkpoints_debug2/./output/vl-bert/contrastive_random_images/base_prec_random_movienet_images_4x16G_fp32/train_train/tensorboard_logs', model_dir='./checkpoints_debug2', slurm=False)
Namespace(cfg='./cfgs/contrastive_pretrain/base_prec_random_movienet_images_4x16G_fp32.yaml', cudnn_off=False, dist=True, do_test=False, log_dir='./checkpoints_debug2/./output/vl-bert/contrastive_random_images/base_prec_random_movienet_images_4x16G_fp32/train_train/tensorboard_logs', model_dir='./checkpoints_debug2', slurm=False)
Namespace(cfg='./cfgs/contrastive_pretrain/base_prec_random_movienet_images_4x16G_fp32.yaml', cudnn_off=False, dist=True, do_test=False, log_dir='./checkpoints_debug2/./output/vl-bert/contrastive_random_images/base_prec_random_movienet_images_4x16G_fp32/train_train/tensorboard_logs', model_dir='./checkpoints_debug2', slurm=False)
{'CHECKPOINT_FREQUENT': 1,
'DATASET': {'CHECKPOINT_FREQUENT'{: {1,
'DATASET': 'CHECKPOINT_FREQUENT': 1,
'DATASET': 'ADD_IMAGE_AS_A_BOX': True,
'ANSWER_VOCAB_FILE': '',
'ANSWER_VOCAB_SIZE': 3129,
'APPEND_INDEX': False,
'BASIC_ALIGN': {False,
'CACHE_MODE': False,
'DATASET': 'movienet',
'DATASET_PATH': '/proj/vondrick/datasets/MovieNet',
'IGNORE_DB_CACHE': False{,
'LABEL_INDEX_IN_BATCH': -1,
'MASK_SIZE': 'ADD_IMAGE_AS_A_BOX'14: ,
'MIN_SEQ_LEN'True: ,
0'ANSWER_VOCAB_FILE',
: 'ONLY_USE_RELEVANT_DETS': '',
True,
'ANSWER_VOCAB_SIZE': 'QA2R_AUG': 3129,
False,
'APPEND_INDEX': 'QA2R_NOQ': 'ADD_IMAGE_AS_A_BOX'False: False,
,
'BASIC_ALIGN'True'ROOT_PATH': ,
: False'ANSWER_VOCAB_FILE''./',
: ,
'CACHE_MODE''SEQ_LEN''': : ,
False64,
'ANSWER_VOCAB_SIZE',
: 'DATASET''TASK': : 3129,
'movienet''Q2AR',
'APPEND_INDEX',
: 'DATASET_PATH''TEST_ANNOTATION_FILE': : False,
'/proj/vondrick/datasets/MovieNet''',
,
'BASIC_ALIGN': 'IGNORE_DB_CACHE''TEST_IMAGE_SET': : FalseFalse,
'val',
,
'CACHE_MODE''LABEL_INDEX_IN_BATCH': 'TRAIN_ANNOTATION_FILE': : False-1,
'',
,
'DATASET''MASK_SIZE': 'TRAIN_IMAGE_SET': : 'movienet'14,
'train',
,
'DATASET_PATH''MIN_SEQ_LEN': 'VAL_ANNOTATION_FILE': : '/proj/vondrick/datasets/MovieNet'0,
'',
,
'IGNORE_DB_CACHE''ONLY_USE_RELEVANT_DETS': 'VAL_IMAGE_SET': : FalseTrue,
'val',
,
'LABEL_INDEX_IN_BATCH''QA2R_AUG': 'ZIP_MODE': : -1False,
False,
'MASK_SIZE'}'QA2R_NOQ': : ,
14False'GPUS',
,
: 'MIN_SEQ_LEN''ROOT_PATH': '0': ,
0Namespace(cfg='./cfgs/contrastive_pretrain/base_prec_random_movienet_images_4x16G_fp32.yaml', cudnn_off=False, dist=True, do_test=False, log_dir='./checkpoints_debug2/./output/vl-bert/contrastive_random_images/base_prec_random_movienet_images_4x16G_fp32/train_train/tensorboard_logs', model_dir='./checkpoints_debug2', slurm=False)'./',
'LOG_FREQUENT',
: 'ONLY_USE_RELEVANT_DETS'
'SEQ_LEN': 100: ,
True64'MODEL_PREFIX',
,
: Namespace(cfg='./cfgs/contrastive_pretrain/base_prec_random_movienet_images_4x16G_fp32.yaml', cudnn_off=False, dist=True, do_test=False, log_dir='./checkpoints_debug2/./output/vl-bert/contrastive_random_images/base_prec_random_movienet_images_4x16G_fp32/train_train/tensorboard_logs', model_dir='./checkpoints_debug2', slurm=False)'QA2R_AUG''vl-bert_base_res101_pretrain''TASK': ,
:
False'MODULE''Q2AR',
: ,
'QA2R_NOQ''ResNetVLBERTForPretraining''TEST_ANNOTATION_FILE': ,
: False'NETWORK''',
: ,
'ROOT_PATH''TEST_IMAGE_SET': : './''val',
,
'TRAIN_ANNOTATION_FILE': 'SEQ_LEN': ''64,
,
'TRAIN_IMAGE_SET''TASK': : 'train''Q2AR',
,
'VAL_ANNOTATION_FILE''TEST_ANNOTATION_FILE': : '''',
,
'VAL_IMAGE_SET''TEST_IMAGE_SET': : 'val''val',
,
'ZIP_MODE''TRAIN_ANNOTATION_FILE': : False''},
,
'TRAIN_IMAGE_SET': 'GPUS': 'train',
'0''VAL_ANNOTATION_FILE',
: 'LOG_FREQUENT''': ,
100Namespace(cfg='./cfgs/contrastive_pretrain/base_prec_random_movienet_images_4x16G_fp32.yaml', cudnn_off=False, dist=True, do_test=False, log_dir='./checkpoints_debug2/./output/vl-bert/contrastive_random_images/base_prec_random_movienet_images_4x16G_fp32/train_train/tensorboard_logs', model_dir='./checkpoints_debug2', slurm=False)'VAL_IMAGE_SET',
: 'MODEL_PREFIX'
'val': ,
'vl-bert_base_res101_pretrain''ZIP_MODE',
: 'MODULE'False: }'ResNetVLBERTForPretraining',
,
'GPUS''NETWORK': : '0',
'LOG_FREQUENT': 100,
'MODEL_PREFIX': 'vl-bert_base_res101_pretrain',
'MODULE': 'ResNetVLBERTForPretraining',
'NETWORK': {'ANS_LOSS_WEIGHT': 1.0,
'BERT_ALIGN_ANSWER': {True,
'BERT_ALIGN_QUESTION': True,
'BERT_FROZEN': False,
'BERT_MODEL_NAME': './model/pretrained_model/bert-base-uncased',
'BERT_PRETRAINED': {'',
'BERT_PRETRAINED_EPOCH': 0,
'BERT_USE_LAYER': -2,
'BERT_WITH_MLM_LOSS': False,
'BERT_WITH_NSP_LOSS': False,
'BLIND': False,
'CLASSIFIER_DROPOUT': 0.1,
'CLASSIFIER_HIDDEN_SIZE': 1024,
'ANS_LOSS_WEIGHT''CLASSIFIER_SIGMOID': : False1.0,
,
'CLASSIFIER_SIGMOID_LOSS_POSITIVE_WEIGHT''BERT_ALIGN_ANSWER': : 1.0True,
,
'CLASSIFIER_TYPE''BERT_ALIGN_QUESTION': : '2fc'True,
,
'CNN_LOSS_WEIGHT''BERT_FROZEN': : 'ANS_LOSS_WEIGHT'1.0False: ,
,
'ENABLE_CNN_REG_LOSS'1.0'BERT_MODEL_NAME': ,
: False'BERT_ALIGN_ANSWER',
: './model/pretrained_model/bert-base-uncased''FOR_MASK_VL_MODELING_PRETRAIN',
: True'BERT_PRETRAINED',
False: ,
'BERT_ALIGN_QUESTION''': 'IMAGE_C5_DILATED',
: True'BERT_PRETRAINED_EPOCH',
True: ,
'BERT_FROZEN'0: 'IMAGE_FEAT_PRECOMPUTED',
: False'BERT_USE_LAYER',
False: 'BERT_MODEL_NAME',
: -2'IMAGE_FINAL_DIM',
'./model/pretrained_model/bert-base-uncased': ,
'BERT_WITH_MLM_LOSS'768: 'BERT_PRETRAINED',
: False'IMAGE_FROZEN_BACKBONE_STAGES',
'': ,
'BERT_WITH_NSP_LOSS': 'BERT_PRETRAINED_EPOCH': False[1, 2],
,
0'BLIND',
'IMAGE_FROZEN_BN': : 'BERT_USE_LAYER': FalseTrue,
,
-2'CLASSIFIER_DROPOUT',
'IMAGE_NUM_LAYERS': : 'BERT_WITH_MLM_LOSS': 0.1101,
,
False,
'CLASSIFIER_HIDDEN_SIZE''IMAGE_PRETRAINED': : 'BERT_WITH_NSP_LOSS': 1024'',
False,
,
'CLASSIFIER_SIGMOID''IMAGE_PRETRAINED_EPOCH': 'BLIND': : False0,
False,
,
'CLASSIFIER_SIGMOID_LOSS_POSITIVE_WEIGHT''IMAGE_SEMANTIC': 'CLASSIFIER_DROPOUT': : 1.0False,
0.1,
,
'CLASSIFIER_TYPE''IMAGE_STRIDE_IN_1x1': 'CLASSIFIER_HIDDEN_SIZE': : '2fc'True,
1024,
,
'CNN_LOSS_WEIGHT''MASK_RAW_PIXELS': 'CLASSIFIER_SIGMOID': : 1.0True,
False,
,
'ENABLE_CNN_REG_LOSS''MLM_LOSS_NORM_IN_BATCH_FIRST': 'CLASSIFIER_SIGMOID_LOSS_POSITIVE_WEIGHT': : FalseFalse,
,
1.0'FOR_MASK_VL_MODELING_PRETRAIN',
'MVRC_LOSS_NORM_IN_BATCH_FIRST': : 'CLASSIFIER_TYPE'False: False,
,
'2fc''IMAGE_C5_DILATED',
'NO_GROUNDING': : 'CNN_LOSS_WEIGHT'True: False,
,
1.0'IMAGE_FEAT_PRECOMPUTED',
'NO_OBJ_ATTENTION': : 'ENABLE_CNN_REG_LOSS'False: False,
,
False'IMAGE_FINAL_DIM',
'OUTPUT_CONV5': : 'FOR_MASK_VL_MODELING_PRETRAIN'768: False,
,
False'IMAGE_FROZEN_BACKBONE_STAGES',
'PARTIAL_PRETRAIN': : 'IMAGE_C5_DILATED': True[1, 2],
,
{'IMAGE_FEAT_PRECOMPUTED''IMAGE_FROZEN_BN'{: : FalseTrue,
,
'IMAGE_FINAL_DIM''IMAGE_NUM_LAYERS': : 768101,
,
'IMAGE_FROZEN_BACKBONE_STAGES''IMAGE_PRETRAINED': : '',
[1, 2]'IMAGE_PRETRAINED_EPOCH',
: 'IMAGE_FROZEN_BN': 'CHECKPOINT_FREQUENT'0'CHECKPOINT_FREQUENT': ,
: True'IMAGE_SEMANTIC',
1: 'IMAGE_NUM_LAYERS'1,
: False,
'DATASET',
101: ,
'DATASET''IMAGE_STRIDE_IN_1x1''IMAGE_PRETRAINED': : : True'',
,
'MASK_RAW_PIXELS''IMAGE_PRETRAINED_EPOCH': : True0,
,
'MLM_LOSS_NORM_IN_BATCH_FIRST''IMAGE_SEMANTIC'{: : FalseFalse,
,
'MVRC_LOSS_NORM_IN_BATCH_FIRST''IMAGE_STRIDE_IN_1x1': : FalseTrue,
,
'NO_GROUNDING''MASK_RAW_PIXELS': : FalseTrue,
,
'NO_OBJ_ATTENTION''MLM_LOSS_NORM_IN_BATCH_FIRST': : False'/proj/vondrick/amogh/commonsense/VL-BERT/model/pretrained_model/vl-bert-base-e2e-backup.model'False,
'CHECKPOINT_FREQUENT',
,
'OUTPUT_CONV5': 'MVRC_LOSS_NORM_IN_BATCH_FIRST': 'PARTIAL_PRETRAIN_PREFIX_CHANGES': : False1False,
,
[],
'PARTIAL_PRETRAIN',
: 'NO_GROUNDING''DATASET''PIXEL_MEANS': : {: False{,
'NO_OBJ_ATTENTION': [102.9801, 115.9465, 122.7717]False,
,
'OUTPUT_CONV5''PIXEL_STDS': : False,
'PARTIAL_PRETRAIN'[1.0, 1.0, 1.0]: ,
'VLBERT': 'ADD_IMAGE_AS_A_BOX': 'ADD_IMAGE_AS_A_BOX'True: ,
'ANSWER_VOCAB_FILE'True: ,
'''ANSWER_VOCAB_FILE',
: 'ANSWER_VOCAB_SIZE''': ,
3129'ANSWER_VOCAB_SIZE': {3129,
'APPEND_INDEX': False,
'BASIC_ALIGN': False,
'/proj/vondrick/amogh/commonsense/VL-BERT/model/pretrained_model/vl-bert-base-e2e-backup.model''CACHE_MODE',
,
: {'PARTIAL_PRETRAIN_PREFIX_CHANGES'False: ,
'DATASET'[]: ,
'movienet''PIXEL_MEANS',
: 'ADD_IMAGE_AS_A_BOX''DATASET_PATH': : '/proj/vondrick/datasets/MovieNet'True'/proj/vondrick/amogh/commonsense/VL-BERT/model/pretrained_model/vl-bert-base-e2e-backup.model',
[102.9801, 115.9465, 122.7717],
,
,
'IGNORE_DB_CACHE''ANSWER_VOCAB_FILE''PIXEL_STDS''PARTIAL_PRETRAIN_PREFIX_CHANGES': : : : False'',
,
[]'attention_probs_dropout_prob''LABEL_INDEX_IN_BATCH'[1.0, 1.0, 1.0],
'ANSWER_VOCAB_SIZE': 'APPEND_INDEX': : ,
'PIXEL_MEANS'0.13129: ,
: -1'VLBERT',
'from_scratch': : ,
[102.9801, 115.9465, 122.7717]'APPEND_INDEX'False,
'MASK_SIZE': ,
: 'PIXEL_STDS''hidden_act'FalseFalse: 14: ,
,
,
'gelu''BASIC_ALIGN'[1.0, 1.0, 1.0],
'MIN_SEQ_LEN': ,
'hidden_dropout_prob''BASIC_ALIGN': : False'VLBERT': ,
0.1: 0,
'CACHE_MODE',
False: 'hidden_size''ONLY_USE_RELEVANT_DETS',
: : FalseTrue,
768,
'CACHE_MODE',
'DATASET': 'QA2R_AUG''initializer_range': : : 'movienet'FalseFalse0.02,
,
,
{,
'DATASET_PATH''QA2R_NOQ''input_size': : : 'DATASET''/proj/vondrick/datasets/MovieNet'False1280: ,
,
,
'IGNORE_DB_CACHE''ROOT_PATH''input_transform_type': : : 'movienet'False1'./',
,
,
,
'LABEL_INDEX_IN_BATCH''intermediate_size': 'SEQ_LEN': 'DATASET_PATH': -13072: ,
{,
64'max_position_embeddings',
'MASK_SIZE': 'attention_probs_dropout_prob': '/proj/vondrick/datasets/MovieNet''TASK'14: 512: ,
,
,
0.1'MIN_SEQ_LEN''Q2AR''num_attention_heads',
: ,
: 'IGNORE_DB_CACHE''from_scratch''TEST_ANNOTATION_FILE'12: ,
0: : 'num_hidden_layers',
False'',
: False'ONLY_USE_RELEVANT_DETS'12,
: ,
'hidden_act': ,
'attention_probs_dropout_prob''TEST_IMAGE_SET''obj_pos_id_relative'True: 'LABEL_INDEX_IN_BATCH',
'gelu': : True: ,
,
'val''QA2R_AUG'0.1'hidden_dropout_prob''object_word_embed_mode',
: ,
: -1: 'from_scratch''TRAIN_ANNOTATION_FILE',
: False0.1: 2,
'',
,
'MASK_SIZE'False'pos_embedding_frozen''QA2R_NOQ',
'hidden_size',
: : 'TRAIN_IMAGE_SET': 768: : 'hidden_act': FalseFalse,
'train'14'initializer_range''gelu',
: ,
,
,
,
'VAL_ANNOTATION_FILE''position_padding_idx'0.02'hidden_dropout_prob''ROOT_PATH',
'MIN_SEQ_LEN': : : : : 'input_size''': -10.1'./',
,
,
'type_vocab_size'01280,
'hidden_size',
: ,
'VAL_IMAGE_SET'3: : ,
'SEQ_LEN''val''input_transform_type'768'ONLY_USE_RELEVANT_DETS''visual_ln',
: ,
: : 64: 1True,
,
,
'initializer_range''ZIP_MODE': 'intermediate_size''TASK''visual_region_classes': : : True: 0.02False'Q2AR'81,
3072,
,
,
}'TEST_ANNOTATION_FILE''visual_scale_object_init',
'input_size': : 'QA2R_AUG''',
1280: 'max_position_embeddings',
'GPUS'0.0,
: : 'TEST_IMAGE_SET''input_transform_type': ,
: 512'0': 'val'False1'visual_scale_text_init',
,
: ,
,
'TRAIN_ANNOTATION_FILE'0.0'LOG_FREQUENT''intermediate_size': : ,
'num_attention_heads',
: : ''3072'visual_size'100,
12,
,
'QA2R_NOQ',
'MODEL_PREFIX': 'max_position_embeddings''num_hidden_layers': 'TRAIN_IMAGE_SET': : : 768: False,
'vl-bert_base_res101_pretrain'51212'vocab_size',
'train',
'obj_pos_id_relative',
,
'MODULE': ,
: 'num_attention_heads': 30522'VAL_ANNOTATION_FILE''ROOT_PATH''ResNetVLBERTForPretraining': True: ,
,
: ,
'NETWORK''with_pooler'12'object_word_embed_mode''': './',
: : ,
False'VAL_IMAGE_SET',
,
: 'num_hidden_layers''word_embedding_frozen'2: : ,
12'val''SEQ_LEN'False'pos_embedding_frozen',
,
}: : 'obj_pos_id_relative''ZIP_MODE',
: False: ,
'WITH_CONT_LOSS'True64False'position_padding_idx': ,
: ,
}'object_word_embed_mode'-1False: ,
'TASK',
,
'type_vocab_size'2: 'WITH_MLM_LOSS''GPUS': : ,
: True'pos_embedding_frozen''Q2AR',
: 3'0',
False'WITH_MVRC_LOSS',
,
,
'visual_ln': 'TEST_ANNOTATION_FILE''position_padding_idx': 'LOG_FREQUENT': False: True: ,
,
-1100'WITH_REL_LOSS',
'visual_region_classes',
: : ''False'type_vocab_size''MODEL_PREFIX'}81: ,
: ,
,
3,
'visual_scale_object_init''NUM_WORKERS_PER_GPU''TEST_IMAGE_SET': 'visual_ln': : : 0.05,
True'vl-bert_base_res101_pretrain',
,
'visual_scale_text_init',
'OUTPUT_PATH''val''visual_region_classes': : 'MODULE': ,
: 0.081,
,
'ResNetVLBERTForPretraining''TRAIN_ANNOTATION_FILE''visual_scale_object_init''visual_size',
: : : './checkpoints_debug2/./output/vl-bert/contrastive_random_images''NETWORK'0.0,
768,
: '''RNG_SEED',
'visual_scale_text_init': : 'vocab_size',
: 123450.0,
,
30522,
'SCALES''visual_size''TRAIN_IMAGE_SET': : 'with_pooler': : 768,
False,
[600, 1000]'vocab_size',
: 'word_embedding_frozen''train''TEST': 30522: ,
,
False'with_pooler'}: ,
'VAL_ANNOTATION_FILE'False,
'WITH_CONT_LOSS': : 'word_embedding_frozen': False{False,
''}'WITH_MLM_LOSS': ,
,
True'WITH_CONT_LOSS''BATCH_IMAGES',
: : 'VAL_IMAGE_SET''WITH_MVRC_LOSS'False32: ,
: ,
False'WITH_MLM_LOSS',
'FLIP_PROB': : 'WITH_REL_LOSS''val': 0True,
,
False,
'SHUFFLE'}'WITH_MVRC_LOSS': : ,
False'ZIP_MODE'False'NUM_WORKERS_PER_GPU': : ,
,
5'TEST_EPOCH''WITH_REL_LOSS',
: : False'OUTPUT_PATH'0False{: }},
},
'TRAIN': 'NUM_WORKERS_PER_GPU': 5,
'./checkpoints_debug2/./output/vl-bert/contrastive_random_images',
,
'OUTPUT_PATH': 'RNG_SEED': 'GPUS'12345: ,
'SCALES': './checkpoints_debug2/./output/vl-bert/contrastive_random_images''0',
,
'RNG_SEED'[600, 1000]: ,
'TEST'12345'LOG_FREQUENT': ,
: 'SCALES': 100[600, 1000],
,
'TEST'{: 'MODEL_PREFIX': 'BATCH_IMAGES': 32'vl-bert_base_res101_pretrain',
{,
'FLIP_PROB': 0'MODULE',
'BATCH_IMAGES': 'SHUFFLE': 'ANS_LOSS_WEIGHT': 32: {,
'ResNetVLBERTForPretraining'False1.0'FLIP_PROB',
,
: ,
'TEST_EPOCH': 0'BERT_ALIGN_ANSWER''NETWORK',
0: }True'SHUFFLE': ,
,
: 'TRAIN': False'BERT_ALIGN_QUESTION',
: 'TEST_EPOCH'True: ,
{0'BERT_FROZEN'}: ,
'ASPECT_GROUPING''TRAIN': False: False,
,
'BERT_MODEL_NAME''AUTO_RESUME': : './model/pretrained_model/bert-base-uncased'True,
,
'BERT_PRETRAINED''BATCH_IMAGES': : ''2,
,
'BEGIN_EPOCH''BERT_PRETRAINED_EPOCH': : 00,
,
'CLIP_GRAD_NORM': 'BERT_USE_LAYER': 10,
-2'END_EPOCH',
: 'BERT_WITH_MLM_LOSS'10: ,
{False'FLIP_PROB',
: 'BERT_WITH_NSP_LOSS'0.5: ,
'FP16'False'ANS_LOSS_WEIGHT',
: : 'BLIND'False: {1.0,
False,
'FP16_LOSS_SCALE',
'BERT_ALIGN_ANSWER': 'CLASSIFIER_DROPOUT': : 128.0True0.1,
,
,
'ASPECT_GROUPING''GRAD_ACCUMULATE_STEPS''BERT_ALIGN_QUESTION': 'CLASSIFIER_HIDDEN_SIZE': False1: : ,
,
True'AUTO_RESUME'1024'LOSS_LOGGERS': ,
: ,
True'CLASSIFIER_SIGMOID''BERT_FROZEN',
: : 'ASPECT_GROUPING''BATCH_IMAGES'False: : False[,
False2,
,
,
'CLASSIFIER_SIGMOID_LOSS_POSITIVE_WEIGHT''BERT_MODEL_NAME': 'AUTO_RESUME': 'BEGIN_EPOCH'('mlm_loss', 'MLMLossWVC')1.0: : './model/pretrained_model/bert-base-uncased',
,
0True,
'CLASSIFIER_TYPE',
,
'BERT_PRETRAINED'('mvrc_loss', 'MVRCLoss')]: 'CLIP_GRAD_NORM''BATCH_IMAGES': ,
: : '2fc''''LR'210,
: ,
,
,
'CNN_LOSS_WEIGHT''BEGIN_EPOCH': 'END_EPOCH'1e-07'BERT_PRETRAINED_EPOCH',
: : 1.0: 'LR_FACTOR'0,
100: 'ENABLE_CNN_REG_LOSS',
,
: 0.1,
'CLIP_GRAD_NORM''FLIP_PROB': ,
False'BERT_USE_LAYER'10: : 'LR_MULT',
0.5: ,
-2,
'FOR_MASK_VL_MODELING_PRETRAIN''END_EPOCH'[],
: 'FP16''BERT_WITH_MLM_LOSS': 10: ,
: ,
FalseFalseFalse'LR_SCHEDULE',
: ,
'FLIP_PROB',
'BERT_WITH_NSP_LOSS''triangle': : 'IMAGE_C5_DILATED''FP16_LOSS_SCALE'False,
0.5: 'LR_STEP': ,
,
: 128.0True'BLIND''FP16',
[],
: 'GRAD_ACCUMULATE_STEPS': ,
'IMAGE_FEAT_PRECOMPUTED': FalseFalse'MOMENTUM'1: : ,
,
,
False0.9,
'CLASSIFIER_DROPOUT',
'FP16_LOSS_SCALE''LOSS_LOGGERS': 'IMAGE_FINAL_DIM': 'OPTIMIZER': : 0.1: 128.0768,
,
'AdamW',
'CLASSIFIER_HIDDEN_SIZE''GRAD_ACCUMULATE_STEPS',
: 'IMAGE_FROZEN_BACKBONE_STAGES''RESUME': : : [11024,
False[1, 2],
'LOSS_LOGGERS',
,
: 'CLASSIFIER_SIGMOID''SHUFFLE'('mlm_loss', 'MLMLossWVC')'IMAGE_FROZEN_BN': : : ,
TrueTrueFalse,
,
[,
'WARMUP'('mvrc_loss', 'MVRCLoss')'CLASSIFIER_SIGMOID_LOSS_POSITIVE_WEIGHT': 'IMAGE_NUM_LAYERS']: : True('mlm_loss', 'MLMLossWVC'),
1011.0,
,
,
'LR',
'WARMUP_FACTOR': 'CLASSIFIER_TYPE'('mvrc_loss', 'MVRCLoss'): : 'IMAGE_PRETRAINED'1e-07]'2fc': ,
,
0.0,
'''LR',
'CNN_LOSS_WEIGHT',
: 'LR_FACTOR''WARMUP_METHOD': 'IMAGE_PRETRAINED_EPOCH': : 1e-07: 1.0,
'linear'0.1,
'LR_FACTOR'0'ENABLE_CNN_REG_LOSS',
,
: : ,
'WARMUP_STEPS'0.1'LR_MULT'False: : ,
'IMAGE_SEMANTIC',
8000[]'FOR_MASK_VL_MODELING_PRETRAIN''LR_MULT': ,
,
: : False'LR_SCHEDULE''WD'False,
[]: : ,
'IMAGE_STRIDE_IN_1x1''triangle',
0.0001,
'LR_SCHEDULE'}: 'IMAGE_C5_DILATED',
'LR_STEP': : : True'VAL': 'triangle'True,
[],
,
,
'MASK_RAW_PIXELS''IMAGE_FEAT_PRECOMPUTED''LR_STEP''MOMENTUM': : : : {'BATCH_IMAGES': 32, 'FLIP_PROB': 0, 'SHUFFLE': False}False[]True,
0.9,
,
,
'VAL_FREQUENT''IMAGE_FINAL_DIM''MOMENTUM',
: 'MLM_LOSS_NORM_IN_BATCH_FIRST': : 'OPTIMIZER'1: : }False'AdamW'768,
0.9
,
,
,
'MVRC_LOSS_NORM_IN_BATCH_FIRST''RESUME''IMAGE_FROZEN_BACKBONE_STAGES': 'OPTIMIZER': : : FalseFalse,
,
'AdamW'[1, 2]'NO_GROUNDING',
'SHUFFLE',
: : 'RESUME''IMAGE_FROZEN_BN'False: True: ,
,
FalseTrue'WARMUP''NO_OBJ_ATTENTION',
: ,
: 'SHUFFLE': FalseTrue'IMAGE_NUM_LAYERS',
True: ,
,
'OUTPUT_CONV5'101: 'WARMUP_FACTOR',
'WARMUP': False: 'IMAGE_PRETRAINED',
0.0True: 'PARTIAL_PRETRAIN',
,
: '''WARMUP_METHOD''WARMUP_FACTOR',
: : 'IMAGE_PRETRAINED_EPOCH''linear'0.0: ,
,
0'WARMUP_STEPS''WARMUP_METHOD',
: : 'IMAGE_SEMANTIC'8000'linear': ,
,
False'WARMUP_STEPS''WD',
: : {'IMAGE_STRIDE_IN_1x1'0.00018000: },
,
'WD'True: 'VAL',
: 0.0001'MASK_RAW_PIXELS'}: ,
True'VAL',
: 'MLM_LOSS_NORM_IN_BATCH_FIRST'{'BATCH_IMAGES': 32, 'FLIP_PROB': 0, 'SHUFFLE': False}: ,
'VAL_FREQUENT'False: ,
{'BATCH_IMAGES': 32, 'FLIP_PROB': 0, 'SHUFFLE': False}1'MVRC_LOSS_NORM_IN_BATCH_FIRST',
}: 'VAL_FREQUENT'
False: ,
1'NO_GROUNDING'}:
False,
'NO_OBJ_ATTENTION': False,
'OUTPUT_CONV5': False,
'PARTIAL_PRETRAIN': '/proj/vondrick/amogh/commonsense/VL-BERT/model/pretrained_model/vl-bert-base-e2e-backup.model',
'PARTIAL_PRETRAIN_PREFIX_CHANGES': [],
'PIXEL_MEANS': [102.9801, 115.9465, 122.7717],
'PIXEL_STDS': [1.0, 1.0, 1.0],
'VLBERT': '/proj/vondrick/amogh/commonsense/VL-BERT/model/pretrained_model/vl-bert-base-e2e-backup.model',
'PARTIAL_PRETRAIN_PREFIX_CHANGES': {[],
'PIXEL_MEANS': 'ANS_LOSS_WEIGHT': [102.9801, 115.9465, 122.7717],
'PIXEL_STDS': 1.0,
[1.0, 1.0, 1.0],
'VLBERT': 'BERT_ALIGN_ANSWER': 'attention_probs_dropout_prob': True0.1,
,
'from_scratch': 'BERT_ALIGN_QUESTION'False: ,
'hidden_act': True'gelu',
,
'hidden_dropout_prob': 'BERT_FROZEN'0.1: ,
'hidden_size': False768,
,
'initializer_range': 'BERT_MODEL_NAME'0.02,
: 'input_size': 1280'./model/pretrained_model/bert-base-uncased'{,
,
'input_transform_type': 1'BERT_PRETRAINED',
: 'intermediate_size': 3072,
'''max_position_embeddings': ,
512,
'BERT_PRETRAINED_EPOCH''num_attention_heads': : 12,
'num_hidden_layers'0: ,
12'attention_probs_dropout_prob',
: 'obj_pos_id_relative''BERT_USE_LAYER'0.1: ,
: True'from_scratch',
: 'object_word_embed_mode'-2False: ,
,
2'hidden_act',
: 'gelu',
'pos_embedding_frozen''hidden_dropout_prob': : 'BERT_WITH_MLM_LOSS'False0.1,
,
: 'hidden_size''position_padding_idx': : False-1768,
,
,
'type_vocab_size''initializer_range': : 'BERT_WITH_NSP_LOSS'0.023,
,
: 'input_size''visual_ln': : False1280True,
,
,
'input_transform_type''visual_region_classes': : 'BLIND'181,
,
: 'intermediate_size''visual_scale_object_init': : False0.03072,
,
,
'visual_scale_text_init''max_position_embeddings': : 'CLASSIFIER_DROPOUT'0.0512,
: ,
'visual_size': 'num_attention_heads': 768,
0.112'vocab_size',
: ,
'num_hidden_layers'30522: ,
'CLASSIFIER_HIDDEN_SIZE''with_pooler'12: : ,
False,
'obj_pos_id_relative': 'word_embedding_frozen'1024: True,
,
False'object_word_embed_mode': }2,
'CLASSIFIER_SIGMOID',
'WITH_CONT_LOSS': 'pos_embedding_frozen': : FalseFalseFalse,
,
'WITH_MLM_LOSS',
'position_padding_idx': : True-1,
'CLASSIFIER_SIGMOID_LOSS_POSITIVE_WEIGHT''WITH_MVRC_LOSS',
: : 'type_vocab_size'False: ,
3'WITH_REL_LOSS',
1.0: 'visual_ln',
: False}True,
'CLASSIFIER_TYPE',
'visual_region_classes': : 'NUM_WORKERS_PER_GPU'81: ,
5'visual_scale_object_init''2fc',
: ,
0.0'OUTPUT_PATH',
: 'visual_scale_text_init''CNN_LOSS_WEIGHT': : 0.0,
'./checkpoints_debug2/./output/vl-bert/contrastive_random_images'1.0'visual_size',
: ,
'RNG_SEED'768: ,
12345'ENABLE_CNN_REG_LOSS''vocab_size',
: : 'SCALES'30522: ,
'with_pooler': False[600, 1000]False,
,
,
'TEST''word_embedding_frozen': : 'FOR_MASK_VL_MODELING_PRETRAIN'False: },
False'WITH_CONT_LOSS': {,
False,
'IMAGE_C5_DILATED''WITH_MLM_LOSS''BATCH_IMAGES': : : True32,
,
True'WITH_MVRC_LOSS''FLIP_PROB': : ,
False0,
,
'IMAGE_FEAT_PRECOMPUTED''WITH_REL_LOSS''SHUFFLE': : : FalseFalse,
False}'TEST_EPOCH',
: ,
0'NUM_WORKERS_PER_GPU': }'IMAGE_FINAL_DIM',
5: ,
'TRAIN': 'OUTPUT_PATH': 768,
'IMAGE_FROZEN_BACKBONE_STAGES''./checkpoints_debug2/./output/vl-bert/contrastive_random_images': ,
'RNG_SEED': 12345,
'SCALES': [1, 2],
[600, 1000],
'IMAGE_FROZEN_BN''TEST': : True,
'IMAGE_NUM_LAYERS'{: 'BATCH_IMAGES'101: ,
32,
'IMAGE_PRETRAINED''FLIP_PROB': : 0,
'SHUFFLE'{'': ,
False,
'TEST_EPOCH''IMAGE_PRETRAINED_EPOCH': : 0},
0'TRAIN': ,
'IMAGE_SEMANTIC': False,
'ASPECT_GROUPING': 'IMAGE_STRIDE_IN_1x1'False: ,
'AUTO_RESUME'True: ,
True,
'BATCH_IMAGES''MASK_RAW_PIXELS': : 2,
'BEGIN_EPOCH': True0,
,
'CLIP_GRAD_NORM': 'MLM_LOSS_NORM_IN_BATCH_FIRST'10: ,
'END_EPOCH': False10,
,
'FLIP_PROB'{: 'MVRC_LOSS_NORM_IN_BATCH_FIRST'0.5,
: 'FP16': FalseFalse,
,
'FP16_LOSS_SCALE': 128.0'NO_GROUNDING',
: 'GRAD_ACCUMULATE_STEPS': 1False,
,
'LOSS_LOGGERS': 'NO_OBJ_ATTENTION''ASPECT_GROUPING': : False,
['AUTO_RESUME'False: ,
True('mlm_loss', 'MLMLossWVC'),
'OUTPUT_CONV5''BATCH_IMAGES',
: : 2('mvrc_loss', 'MVRCLoss'),
]False'BEGIN_EPOCH',
: 'LR',
0: ,
'CLIP_GRAD_NORM'1e-07'PARTIAL_PRETRAIN': ,
: 10'LR_FACTOR',
: 'END_EPOCH'0.1: ,
10'LR_MULT',
: 'FLIP_PROB': [],
0.5'LR_SCHEDULE',
: 'FP16''triangle': ,
False'LR_STEP',
: 'FP16_LOSS_SCALE'[]: ,
128.0'MOMENTUM',
: 'GRAD_ACCUMULATE_STEPS'0.9: ,
1'OPTIMIZER',
: 'LOSS_LOGGERS''AdamW': ,
'RESUME': False,
'SHUFFLE': [True,
'WARMUP': True('mlm_loss', 'MLMLossWVC'),
,
'WARMUP_FACTOR': 0.0('mvrc_loss', 'MVRCLoss'),
]'WARMUP_METHOD',
: 'LR''linear': ,
'WARMUP_STEPS'1e-07: ,
8000'LR_FACTOR',
: 'WD': 0.1,
0.0001'LR_MULT'}: ,
[]'VAL',
: 'LR_SCHEDULE': 'triangle',
'LR_STEP': {'BATCH_IMAGES': 32, 'FLIP_PROB': 0, 'SHUFFLE': False}[],
,
'VAL_FREQUENT''MOMENTUM': : 10.9},
'OPTIMIZER'
: 'AdamW',
'RESUME': False,
'SHUFFLE': True,
'WARMUP': True,
'WARMUP_FACTOR': 0.0,
'WARMUP_METHOD': 'linear',
'WARMUP_STEPS': 8000,
'WD': 0.0001},
'VAL': {'BATCH_IMAGES': 32, 'FLIP_PROB': 0, 'SHUFFLE': False},
'VAL_FREQUENT': 1}
'/proj/vondrick/amogh/commonsense/VL-BERT/model/pretrained_model/vl-bert-base-e2e-backup.model',
'PARTIAL_PRETRAIN_PREFIX_CHANGES': [],
'PIXEL_MEANS': [102.9801, 115.9465, 122.7717],
'PIXEL_STDS': [1.0, 1.0, 1.0],
'VLBERT': {'attention_probs_dropout_prob': 0.1,
'from_scratch': False,
'hidden_act': 'gelu',
'hidden_dropout_prob': 0.1,
'hidden_size': 768,
'initializer_range': 0.02,
'input_size': 1280,
'input_transform_type': 1,
'intermediate_size': 3072,
'max_position_embeddings': 512,
'num_attention_heads': 12,
'num_hidden_layers': 12,
'obj_pos_id_relative': True,
'object_word_embed_mode': 2,
'pos_embedding_frozen': False,
'position_padding_idx': -1,
'type_vocab_size': 3,
'visual_ln': True,
'visual_region_classes': 81,
'visual_scale_object_init': 0.0,
'visual_scale_text_init': 0.0,
'visual_size': 768,
'vocab_size': 30522,
'with_pooler': False,
'word_embedding_frozen': False},
'WITH_CONT_LOSS': False,
'WITH_MLM_LOSS': True,
'WITH_MVRC_LOSS': False,
'WITH_REL_LOSS': False},
'NUM_WORKERS_PER_GPU': 5,
'OUTPUT_PATH': './checkpoints_debug2/./output/vl-bert/contrastive_random_images',
'RNG_SEED': 12345,
'SCALES': [600, 1000],
'TEST': {'BATCH_IMAGES': 32,
'FLIP_PROB': 0,
'SHUFFLE': False,
'TEST_EPOCH': 0},
'TRAIN': {'ASPECT_GROUPING': False,
'AUTO_RESUME': True,
'BATCH_IMAGES': 2,
'BEGIN_EPOCH': 0,
'CLIP_GRAD_NORM': 10,
'END_EPOCH': 10,
'FLIP_PROB': 0.5,
'FP16': False,
'FP16_LOSS_SCALE': 128.0,
'GRAD_ACCUMULATE_STEPS': 1,
'LOSS_LOGGERS': [('mlm_loss', 'MLMLossWVC'),
('mvrc_loss', 'MVRCLoss')],
'LR': 1e-07,
'LR_FACTOR': 0.1,
'LR_MULT': [],
'LR_SCHEDULE': 'triangle',
'LR_STEP': [],
'MOMENTUM': 0.9,
'OPTIMIZER': 'AdamW',
'RESUME': False,
'SHUFFLE': True,
'WARMUP': True,
'WARMUP_FACTOR': 0.0,
'WARMUP_METHOD': 'linear',
'WARMUP_STEPS': 8000,
'WD': 0.0001},
'VAL': {'BATCH_IMAGES': 32, 'FLIP_PROB': 0, 'SHUFFLE': False},
'VAL_FREQUENT': 1}
Warnings: Unexpected keys: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias'].
Warnings: Unexpected keys: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias'].
Warnings: Unexpected keys: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias'].
Warnings: Unexpected keys: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias'].
Warnings: Unexpected keys: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias'].
Warnings: Unexpected keys: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias'].
NCCL version 2.7.8+cuda10.2
native distributed, size: 6, rank: 5, local rank: 5native distributed, size: 6, rank: 2, local rank: 2
native distributed, size: 6, rank: 4, local rank: 4
native distributed, size: 6, rank: 3, local rank: 3native distributed, size: 6, rank: 1, local rank: 1native distributed, size: 6, rank: 0, local rank: 0
>> Trainable Parameters:
------------------------------------------------------------------------------------------------------------------------------------
|Name |Dtype |Shape |#Params |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.0.conv1.weight |torch.float32 |(128, 256, 1, 1) |32768 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.0.conv2.weight |torch.float32 |(128, 128, 3, 3) |147456 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.0.conv3.weight |torch.float32 |(512, 128, 1, 1) |65536 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.0.downsample.0.weight |torch.float32 |(512, 256, 1, 1) |131072 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.1.conv1.weight |torch.float32 |(128, 512, 1, 1) |65536 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.1.conv2.weight |torch.float32 |(128, 128, 3, 3) |147456 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.1.conv3.weight |torch.float32 |(512, 128, 1, 1) |65536 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.2.conv1.weight |torch.float32 |(128, 512, 1, 1) |65536 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.2.conv2.weight |torch.float32 |(128, 128, 3, 3) |147456 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.2.conv3.weight |torch.float32 |(512, 128, 1, 1) |65536 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.3.conv1.weight |torch.float32 |(128, 512, 1, 1) |65536 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.3.conv2.weight |torch.float32 |(128, 128, 3, 3) |147456 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.3.conv3.weight |torch.float32 |(512, 128, 1, 1) |65536 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.0.conv1.weight |torch.float32 |(256, 512, 1, 1) |131072 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.0.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.0.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.0.downsample.0.weight |torch.float32 |(1024, 512, 1, 1) |524288 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.1.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.1.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.1.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.2.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.2.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.2.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.3.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.3.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.3.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.4.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.4.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.4.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.5.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.5.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.5.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.6.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.6.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.6.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.7.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.7.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.7.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.8.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.8.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.8.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.9.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.9.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.9.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.10.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.10.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.10.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.11.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.11.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.11.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.12.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.12.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.12.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.13.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.13.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.13.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.14.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.14.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.14.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.15.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.15.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.15.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.16.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.16.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.16.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.17.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.17.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.17.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.18.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.18.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.18.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.19.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.19.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.19.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.20.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.20.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.20.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.21.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.21.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.21.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.22.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.22.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.22.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.roi_head_feature_extractor.0.conv1.weight |torch.float32 |(512, 1024, 1, 1) |524288 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.roi_head_feature_extractor.0.conv2.weight |torch.float32 |(512, 512, 3, 3) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.roi_head_feature_extractor.0.conv3.weight |torch.float32 |(2048, 512, 1, 1) |1048576 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.roi_head_feature_extractor.0.downsample.0.weight |torch.float32 |(2048, 1024, 1, 1) |2097152 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.roi_head_feature_extractor.1.conv1.weight |torch.float32 |(512, 2048, 1, 1) |1048576 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.roi_head_feature_extractor.1.conv2.weight |torch.float32 |(512, 512, 3, 3) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.roi_head_feature_extractor.1.conv3.weight |torch.float32 |(2048, 512, 1, 1) |1048576 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.roi_head_feature_extractor.2.conv1.weight |torch.float32 |(512, 2048, 1, 1) |1048576 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.roi_head_feature_extractor.2.conv2.weight |torch.float32 |(512, 512, 3, 3) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.roi_head_feature_extractor.2.conv3.weight |torch.float32 |(2048, 512, 1, 1) |1048576 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.obj_downsample.1.weight |torch.float32 |(768, 4096) |3145728 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.obj_downsample.1.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|object_linguistic_embeddings.weight |torch.float32 |(1, 768) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.word_embeddings.weight |torch.float32 |(30522, 768) |23440896 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.end_embedding.weight |torch.float32 |(1, 768) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.position_embeddings.weight |torch.float32 |(512, 768) |393216 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.token_type_embeddings.weight |torch.float32 |(3, 768) |2304 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.embedding_LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.embedding_LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.visual_ln_text.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.visual_ln_text.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.visual_ln_object.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.visual_ln_object.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.0.attention.self.query.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.0.attention.self.query.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.0.attention.self.key.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.0.attention.self.key.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.0.attention.self.value.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.0.attention.self.value.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.0.attention.output.dense.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.0.attention.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.0.attention.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.0.attention.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.0.intermediate.dense.weight |torch.float32 |(3072, 768) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.0.intermediate.dense.bias |torch.float32 |(3072,) |3072 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.0.output.dense.weight |torch.float32 |(768, 3072) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.0.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.0.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.0.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.1.attention.self.query.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.1.attention.self.query.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.1.attention.self.key.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.1.attention.self.key.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.1.attention.self.value.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.1.attention.self.value.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.1.attention.output.dense.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.1.attention.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.1.attention.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.1.attention.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.1.intermediate.dense.weight |torch.float32 |(3072, 768) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.1.intermediate.dense.bias |torch.float32 |(3072,) |3072 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.1.output.dense.weight |torch.float32 |(768, 3072) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.1.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.1.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.1.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.2.attention.self.query.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.2.attention.self.query.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.2.attention.self.key.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.2.attention.self.key.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.2.attention.self.value.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.2.attention.self.value.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.2.attention.output.dense.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.2.attention.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.2.attention.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.2.attention.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.2.intermediate.dense.weight |torch.float32 |(3072, 768) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.2.intermediate.dense.bias |torch.float32 |(3072,) |3072 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.2.output.dense.weight |torch.float32 |(768, 3072) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.2.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.2.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.2.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.3.attention.self.query.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.3.attention.self.query.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.3.attention.self.key.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.3.attention.self.key.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.3.attention.self.value.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.3.attention.self.value.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.3.attention.output.dense.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.3.attention.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.3.attention.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.3.attention.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.3.intermediate.dense.weight |torch.float32 |(3072, 768) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.3.intermediate.dense.bias |torch.float32 |(3072,) |3072 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.3.output.dense.weight |torch.float32 |(768, 3072) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.3.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.3.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.3.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.4.attention.self.query.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.4.attention.self.query.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.4.attention.self.key.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.4.attention.self.key.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.4.attention.self.value.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.4.attention.self.value.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.4.attention.output.dense.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.4.attention.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.4.attention.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.4.attention.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.4.intermediate.dense.weight |torch.float32 |(3072, 768) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.4.intermediate.dense.bias |torch.float32 |(3072,) |3072 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.4.output.dense.weight |torch.float32 |(768, 3072) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.4.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.4.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.4.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.5.attention.self.query.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.5.attention.self.query.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.5.attention.self.key.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.5.attention.self.key.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.5.attention.self.value.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.5.attention.self.value.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.5.attention.output.dense.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.5.attention.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.5.attention.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.5.attention.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.5.intermediate.dense.weight |torch.float32 |(3072, 768) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.5.intermediate.dense.bias |torch.float32 |(3072,) |3072 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.5.output.dense.weight |torch.float32 |(768, 3072) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.5.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.5.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.5.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.6.attention.self.query.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.6.attention.self.query.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.6.attention.self.key.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.6.attention.self.key.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.6.attention.self.value.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.6.attention.self.value.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.6.attention.output.dense.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.6.attention.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.6.attention.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.6.attention.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.6.intermediate.dense.weight |torch.float32 |(3072, 768) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.6.intermediate.dense.bias |torch.float32 |(3072,) |3072 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.6.output.dense.weight |torch.float32 |(768, 3072) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.6.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.6.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.6.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.7.attention.self.query.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.7.attention.self.query.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.7.attention.self.key.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.7.attention.self.key.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.7.attention.self.value.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.7.attention.self.value.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.7.attention.output.dense.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.7.attention.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.7.attention.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.7.attention.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.7.intermediate.dense.weight |torch.float32 |(3072, 768) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.7.intermediate.dense.bias |torch.float32 |(3072,) |3072 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.7.output.dense.weight |torch.float32 |(768, 3072) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.7.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.7.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.7.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.8.attention.self.query.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.8.attention.self.query.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.8.attention.self.key.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.8.attention.self.key.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.8.attention.self.value.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.8.attention.self.value.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.8.attention.output.dense.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.8.attention.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.8.attention.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.8.attention.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.8.intermediate.dense.weight |torch.float32 |(3072, 768) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.8.intermediate.dense.bias |torch.float32 |(3072,) |3072 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.8.output.dense.weight |torch.float32 |(768, 3072) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.8.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.8.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.8.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.9.attention.self.query.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.9.attention.self.query.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.9.attention.self.key.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.9.attention.self.key.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.9.attention.self.value.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.9.attention.self.value.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.9.attention.output.dense.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.9.attention.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.9.attention.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.9.attention.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.9.intermediate.dense.weight |torch.float32 |(3072, 768) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.9.intermediate.dense.bias |torch.float32 |(3072,) |3072 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.9.output.dense.weight |torch.float32 |(768, 3072) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.9.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.9.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.9.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.10.attention.self.query.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.10.attention.self.query.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.10.attention.self.key.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.10.attention.self.key.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.10.attention.self.value.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.10.attention.self.value.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.10.attention.output.dense.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.10.attention.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.10.attention.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.10.attention.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.10.intermediate.dense.weight |torch.float32 |(3072, 768) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.10.intermediate.dense.bias |torch.float32 |(3072,) |3072 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.10.output.dense.weight |torch.float32 |(768, 3072) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.10.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.10.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.10.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.11.attention.self.query.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.11.attention.self.query.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.11.attention.self.key.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.11.attention.self.key.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.11.attention.self.value.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.11.attention.self.value.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.11.attention.output.dense.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.11.attention.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.11.attention.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.11.attention.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.11.intermediate.dense.weight |torch.float32 |(3072, 768) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.11.intermediate.dense.bias |torch.float32 |(3072,) |3072 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.11.output.dense.weight |torch.float32 |(768, 3072) |2359296 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.11.output.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.11.output.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.encoder.layer.11.output.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.mlm_head.predictions.bias |torch.float32 |(30522,) |30522 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.mlm_head.predictions.transform.dense.weight |torch.float32 |(768, 768) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.mlm_head.predictions.transform.dense.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.mlm_head.predictions.transform.LayerNorm.weight |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
|vlbert.mlm_head.predictions.transform.LayerNorm.bias |torch.float32 |(768,) |768 |
------------------------------------------------------------------------------------------------------------------------------------
cached database found in ./cache/db_movienet_shotframes_train.pkl.
loading cached database from ./cache/db_movienet_shotframes_train.pkl...
>> # TrainableParams: 154.84 M
>> # NonTrainableParams: 0.33 M
>> # TotalParams: 155.17 M
cached database found in ./cache/db_movienet_shotframes_train.pkl.
loading cached database from ./cache/db_movienet_shotframes_train.pkl...
cached database found in ./cache/db_movienet_shotframes_train.pkl.
loading cached database from ./cache/db_movienet_shotframes_train.pkl...
cached database found in ./cache/db_movienet_shotframes_train.pkl.
loading cached database from ./cache/db_movienet_shotframes_train.pkl...
cached database found in ./cache/db_movienet_shotframes_train.pkl.
loading cached database from ./cache/db_movienet_shotframes_train.pkl...
cached database found in ./cache/db_movienet_shotframes_train.pkl.
loading cached database from ./cache/db_movienet_shotframes_train.pkl...
Done (t=2.03s)
Done (t=2.05s)
Done (t=2.06s)
cached database found in ./cache/db_movienet_shotframes_val.pkl.
loading cached database from ./cache/db_movienet_shotframes_val.pkl...
cached database found in ./cache/db_movienet_shotframes_val.pkl.
loading cached database from ./cache/db_movienet_shotframes_val.pkl...
cached database found in ./cache/db_movienet_shotframes_val.pkl.
loading cached database from ./cache/db_movienet_shotframes_val.pkl...
Done (t=2.12s)
cached database found in ./cache/db_movienet_shotframes_val.pkl.
loading cached database from ./cache/db_movienet_shotframes_val.pkl...
Done (t=2.20s)
cached database found in ./cache/db_movienet_shotframes_val.pkl.
loading cached database from ./cache/db_movienet_shotframes_val.pkl...
Done (t=2.29s)
cached database found in ./cache/db_movienet_shotframes_val.pkl.
loading cached database from ./cache/db_movienet_shotframes_val.pkl...
Done (t=0.39s)
Done (t=0.39s)
Done (t=0.43s)
Done (t=0.36s)
Done (t=0.37s)
Done (t=0.40s)
[Partial Load] partial load state dict of keys: dict_keys(['module.image_feature_extractor.backbone.conv1.weight', 'module.image_feature_extractor.backbone.bn1.weight', 'module.image_feature_extractor.backbone.bn1.bias', 'module.image_feature_extractor.backbone.bn1.running_mean', 'module.image_feature_extractor.backbone.bn1.running_var', 'module.image_feature_extractor.backbone.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.conv1.weight', 'module.image_feature_extractor.backbone.layer1.0.bn1.weight', 'module.image_feature_extractor.backbone.layer1.0.bn1.bias', 'module.image_feature_extractor.backbone.layer1.0.bn1.running_mean', 'module.image_feature_extractor.backbone.layer1.0.bn1.running_var', 'module.image_feature_extractor.backbone.layer1.0.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.conv2.weight', 'module.image_feature_extractor.backbone.layer1.0.bn2.weight', 'module.image_feature_extractor.backbone.layer1.0.bn2.bias', 'module.image_feature_extractor.backbone.layer1.0.bn2.running_mean', 'module.image_feature_extractor.backbone.layer1.0.bn2.running_var', 'module.image_feature_extractor.backbone.layer1.0.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.conv3.weight', 'module.image_feature_extractor.backbone.layer1.0.bn3.weight', 'module.image_feature_extractor.backbone.layer1.0.bn3.bias', 'module.image_feature_extractor.backbone.layer1.0.bn3.running_mean', 'module.image_feature_extractor.backbone.layer1.0.bn3.running_var', 'module.image_feature_extractor.backbone.layer1.0.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.downsample.0.weight', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.weight', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.bias', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.running_mean', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.running_var', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.1.conv1.weight', 'module.image_feature_extractor.backbone.layer1.1.bn1.weight', 'module.image_feature_extractor.backbone.layer1.1.bn1.bias', 'module.image_feature_extractor.backbone.layer1.1.bn1.running_mean', 'module.image_feature_extractor.backbone.layer1.1.bn1.running_var', 'module.image_feature_extractor.backbone.layer1.1.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.1.conv2.weight', 'module.image_feature_extractor.backbone.layer1.1.bn2.weight', 'module.image_feature_extractor.backbone.layer1.1.bn2.bias', 'module.image_feature_extractor.backbone.layer1.1.bn2.running_mean', 'module.image_feature_extractor.backbone.layer1.1.bn2.running_var', 'module.image_feature_extractor.backbone.layer1.1.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.1.conv3.weight', 'module.image_feature_extractor.backbone.layer1.1.bn3.weight', 'module.image_feature_extractor.backbone.layer1.1.bn3.bias', 'module.image_feature_extractor.backbone.layer1.1.bn3.running_mean', 'module.image_feature_extractor.backbone.layer1.1.bn3.running_var', 'module.image_feature_extractor.backbone.layer1.1.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.2.conv1.weight', 'module.image_feature_extractor.backbone.layer1.2.bn1.weight', 'module.image_feature_extractor.backbone.layer1.2.bn1.bias', 'module.image_feature_extractor.backbone.layer1.2.bn1.running_mean', 'module.image_feature_extractor.backbone.layer1.2.bn1.running_var', 'module.image_feature_extractor.backbone.layer1.2.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.2.conv2.weight', 'module.image_feature_extractor.backbone.layer1.2.bn2.weight', 'module.image_feature_extractor.backbone.layer1.2.bn2.bias', 'module.image_feature_extractor.backbone.layer1.2.bn2.running_mean', 'module.image_feature_extractor.backbone.layer1.2.bn2.running_var', 'module.image_feature_extractor.backbone.layer1.2.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.2.conv3.weight', 'module.image_feature_extractor.backbone.layer1.2.bn3.weight', 'module.image_feature_extractor.backbone.layer1.2.bn3.bias', 'module.image_feature_extractor.backbone.layer1.2.bn3.running_mean', 'module.image_feature_extractor.backbone.layer1.2.bn3.running_var', 'module.image_feature_extractor.backbone.layer1.2.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.conv1.weight', 'module.image_feature_extractor.backbone.layer2.0.bn1.weight', 'module.image_feature_extractor.backbone.layer2.0.bn1.bias', 'module.image_feature_extractor.backbone.layer2.0.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.0.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.0.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.conv2.weight', 'module.image_feature_extractor.backbone.layer2.0.bn2.weight', 'module.image_feature_extractor.backbone.layer2.0.bn2.bias', 'module.image_feature_extractor.backbone.layer2.0.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.0.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.0.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.conv3.weight', 'module.image_feature_extractor.backbone.layer2.0.bn3.weight', 'module.image_feature_extractor.backbone.layer2.0.bn3.bias', 'module.image_feature_extractor.backbone.layer2.0.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.0.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.0.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.downsample.0.weight', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.weight', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.bias', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.running_mean', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.running_var', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.1.conv1.weight', 'module.image_feature_extractor.backbone.layer2.1.bn1.weight', 'module.image_feature_extractor.backbone.layer2.1.bn1.bias', 'module.image_feature_extractor.backbone.layer2.1.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.1.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.1.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.1.conv2.weight', 'module.image_feature_extractor.backbone.layer2.1.bn2.weight', 'module.image_feature_extractor.backbone.layer2.1.bn2.bias', 'module.image_feature_extractor.backbone.layer2.1.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.1.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.1.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.1.conv3.weight', 'module.image_feature_extractor.backbone.layer2.1.bn3.weight', 'module.image_feature_extractor.backbone.layer2.1.bn3.bias', 'module.image_feature_extractor.backbone.layer2.1.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.1.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.1.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.2.conv1.weight', 'module.image_feature_extractor.backbone.layer2.2.bn1.weight', 'module.image_feature_extractor.backbone.layer2.2.bn1.bias', 'module.image_feature_extractor.backbone.layer2.2.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.2.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.2.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.2.conv2.weight', 'module.image_feature_extractor.backbone.layer2.2.bn2.weight', 'module.image_feature_extractor.backbone.layer2.2.bn2.bias', 'module.image_feature_extractor.backbone.layer2.2.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.2.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.2.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.2.conv3.weight', 'module.image_feature_extractor.backbone.layer2.2.bn3.weight', 'module.image_feature_extractor.backbone.layer2.2.bn3.bias', 'module.image_feature_extractor.backbone.layer2.2.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.2.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.2.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.3.conv1.weight', 'module.image_feature_extractor.backbone.layer2.3.bn1.weight', 'module.image_feature_extractor.backbone.layer2.3.bn1.bias', 'module.image_feature_extractor.backbone.layer2.3.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.3.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.3.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.3.conv2.weight', 'module.image_feature_extractor.backbone.layer2.3.bn2.weight', 'module.image_feature_extractor.backbone.layer2.3.bn2.bias', 'module.image_feature_extractor.backbone.layer2.3.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.3.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.3.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.3.conv3.weight', 'module.image_feature_extractor.backbone.layer2.3.bn3.weight', 'module.image_feature_extractor.backbone.layer2.3.bn3.bias', 'module.image_feature_extractor.backbone.layer2.3.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.3.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.3.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.conv1.weight', 'module.image_feature_extractor.backbone.layer3.0.bn1.weight', 'module.image_feature_extractor.backbone.layer3.0.bn1.bias', 'module.image_feature_extractor.backbone.layer3.0.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.0.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.0.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.conv2.weight', 'module.image_feature_extractor.backbone.layer3.0.bn2.weight', 'module.image_feature_extractor.backbone.layer3.0.bn2.bias', 'module.image_feature_extractor.backbone.layer3.0.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.0.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.0.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.conv3.weight', 'module.image_feature_extractor.backbone.layer3.0.bn3.weight', 'module.image_feature_extractor.backbone.layer3.0.bn3.bias', 'module.image_feature_extractor.backbone.layer3.0.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.0.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.0.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.downsample.0.weight', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.weight', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.bias', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.running_mean', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.running_var', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.1.conv1.weight', 'module.image_feature_extractor.backbone.layer3.1.bn1.weight', 'module.image_feature_extractor.backbone.layer3.1.bn1.bias', 'module.image_feature_extractor.backbone.layer3.1.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.1.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.1.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.1.conv2.weight', 'module.image_feature_extractor.backbone.layer3.1.bn2.weight', 'module.image_feature_extractor.backbone.layer3.1.bn2.bias', 'module.image_feature_extractor.backbone.layer3.1.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.1.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.1.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.1.conv3.weight', 'module.image_feature_extractor.backbone.layer3.1.bn3.weight', 'module.image_feature_extractor.backbone.layer3.1.bn3.bias', 'module.image_feature_extractor.backbone.layer3.1.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.1.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.1.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.2.conv1.weight', 'module.image_feature_extractor.backbone.layer3.2.bn1.weight', 'module.image_feature_extractor.backbone.layer3.2.bn1.bias', 'module.image_feature_extractor.backbone.layer3.2.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.2.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.2.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.2.conv2.weight', 'module.image_feature_extractor.backbone.layer3.2.bn2.weight', 'module.image_feature_extractor.backbone.layer3.2.bn2.bias', 'module.image_feature_extractor.backbone.layer3.2.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.2.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.2.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.2.conv3.weight', 'module.image_feature_extractor.backbone.layer3.2.bn3.weight', 'module.image_feature_extractor.backbone.layer3.2.bn3.bias', 'module.image_feature_extractor.backbone.layer3.2.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.2.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.2.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.3.conv1.weight', 'module.image_feature_extractor.backbone.layer3.3.bn1.weight', 'module.image_feature_extractor.backbone.layer3.3.bn1.bias', 'module.image_feature_extractor.backbone.layer3.3.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.3.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.3.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.3.conv2.weight', 'module.image_feature_extractor.backbone.layer3.3.bn2.weight', 'module.image_feature_extractor.backbone.layer3.3.bn2.bias', 'module.image_feature_extractor.backbone.layer3.3.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.3.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.3.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.3.conv3.weight', 'module.image_feature_extractor.backbone.layer3.3.bn3.weight', 'module.image_feature_extractor.backbone.layer3.3.bn3.bias', 'module.image_feature_extractor.backbone.layer3.3.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.3.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.3.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.4.conv1.weight', 'module.image_feature_extractor.backbone.layer3.4.bn1.weight', 'module.image_feature_extractor.backbone.layer3.4.bn1.bias', 'module.image_feature_extractor.backbone.layer3.4.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.4.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.4.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.4.conv2.weight', 'module.image_feature_extractor.backbone.layer3.4.bn2.weight', 'module.image_feature_extractor.backbone.layer3.4.bn2.bias', 'module.image_feature_extractor.backbone.layer3.4.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.4.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.4.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.4.conv3.weight', 'module.image_feature_extractor.backbone.layer3.4.bn3.weight', 'module.image_feature_extractor.backbone.layer3.4.bn3.bias', 'module.image_feature_extractor.backbone.layer3.4.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.4.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.4.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.5.conv1.weight', 'module.image_feature_extractor.backbone.layer3.5.bn1.weight', 'module.image_feature_extractor.backbone.layer3.5.bn1.bias', 'module.image_feature_extractor.backbone.layer3.5.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.5.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.5.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.5.conv2.weight', 'module.image_feature_extractor.backbone.layer3.5.bn2.weight', 'module.image_feature_extractor.backbone.layer3.5.bn2.bias', 'module.image_feature_extractor.backbone.layer3.5.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.5.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.5.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.5.conv3.weight', 'module.image_feature_extractor.backbone.layer3.5.bn3.weight', 'module.image_feature_extractor.backbone.layer3.5.bn3.bias', 'module.image_feature_extractor.backbone.layer3.5.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.5.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.5.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.6.conv1.weight', 'module.image_feature_extractor.backbone.layer3.6.bn1.weight', 'module.image_feature_extractor.backbone.layer3.6.bn1.bias', 'module.image_feature_extractor.backbone.layer3.6.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.6.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.6.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.6.conv2.weight', 'module.image_feature_extractor.backbone.layer3.6.bn2.weight', 'module.image_feature_extractor.backbone.layer3.6.bn2.bias', 'module.image_feature_extractor.backbone.layer3.6.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.6.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.6.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.6.conv3.weight', 'module.image_feature_extractor.backbone.layer3.6.bn3.weight', 'module.image_feature_extractor.backbone.layer3.6.bn3.bias', 'module.image_feature_extractor.backbone.layer3.6.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.6.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.6.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.7.conv1.weight', 'module.image_feature_extractor.backbone.layer3.7.bn1.weight', 'module.image_feature_extractor.backbone.layer3.7.bn1.bias', 'module.image_feature_extractor.backbone.layer3.7.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.7.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.7.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.7.conv2.weight', 'module.image_feature_extractor.backbone.layer3.7.bn2.weight', 'module.image_feature_extractor.backbone.layer3.7.bn2.bias', 'module.image_feature_extractor.backbone.layer3.7.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.7.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.7.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.7.conv3.weight', 'module.image_feature_extractor.backbone.layer3.7.bn3.weight', 'module.image_feature_extractor.backbone.layer3.7.bn3.bias', 'module.image_feature_extractor.backbone.layer3.7.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.7.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.7.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.8.conv1.weight', 'module.image_feature_extractor.backbone.layer3.8.bn1.weight', 'module.image_feature_extractor.backbone.layer3.8.bn1.bias', 'module.image_feature_extractor.backbone.layer3.8.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.8.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.8.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.8.conv2.weight', 'module.image_feature_extractor.backbone.layer3.8.bn2.weight', 'module.image_feature_extractor.backbone.layer3.8.bn2.bias', 'module.image_feature_extractor.backbone.layer3.8.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.8.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.8.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.8.conv3.weight', 'module.image_feature_extractor.backbone.layer3.8.bn3.weight', 'module.image_feature_extractor.backbone.layer3.8.bn3.bias', 'module.image_feature_extractor.backbone.layer3.8.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.8.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.8.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.9.conv1.weight', 'module.image_feature_extractor.backbone.layer3.9.bn1.weight', 'module.image_feature_extractor.backbone.layer3.9.bn1.bias', 'module.image_feature_extractor.backbone.layer3.9.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.9.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.9.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.9.conv2.weight', 'module.image_feature_extractor.backbone.layer3.9.bn2.weight', 'module.image_feature_extractor.backbone.layer3.9.bn2.bias', 'module.image_feature_extractor.backbone.layer3.9.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.9.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.9.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.9.conv3.weight', 'module.image_feature_extractor.backbone.layer3.9.bn3.weight', 'module.image_feature_extractor.backbone.layer3.9.bn3.bias', 'module.image_feature_extractor.backbone.layer3.9.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.9.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.9.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.10.conv1.weight', 'module.image_feature_extractor.backbone.layer3.10.bn1.weight', 'module.image_feature_extractor.backbone.layer3.10.bn1.bias', 'module.image_feature_extractor.backbone.layer3.10.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.10.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.10.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.10.conv2.weight', 'module.image_feature_extractor.backbone.layer3.10.bn2.weight', 'module.image_feature_extractor.backbone.layer3.10.bn2.bias', 'module.image_feature_extractor.backbone.layer3.10.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.10.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.10.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.10.conv3.weight', 'module.image_feature_extractor.backbone.layer3.10.bn3.weight', 'module.image_feature_extractor.backbone.layer3.10.bn3.bias', 'module.image_feature_extractor.backbone.layer3.10.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.10.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.10.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.11.conv1.weight', 'module.image_feature_extractor.backbone.layer3.11.bn1.weight', 'module.image_feature_extractor.backbone.layer3.11.bn1.bias', 'module.image_feature_extractor.backbone.layer3.11.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.11.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.11.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.11.conv2.weight', 'module.image_feature_extractor.backbone.layer3.11.bn2.weight', 'module.image_feature_extractor.backbone.layer3.11.bn2.bias', 'module.image_feature_extractor.backbone.layer3.11.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.11.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.11.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.11.conv3.weight', 'module.image_feature_extractor.backbone.layer3.11.bn3.weight', 'module.image_feature_extractor.backbone.layer3.11.bn3.bias', 'module.image_feature_extractor.backbone.layer3.11.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.11.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.11.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.12.conv1.weight', 'module.image_feature_extractor.backbone.layer3.12.bn1.weight', 'module.image_feature_extractor.backbone.layer3.12.bn1.bias', 'module.image_feature_extractor.backbone.layer3.12.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.12.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.12.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.12.conv2.weight', 'module.image_feature_extractor.backbone.layer3.12.bn2.weight', 'module.image_feature_extractor.backbone.layer3.12.bn2.bias', 'module.image_feature_extractor.backbone.layer3.12.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.12.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.12.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.12.conv3.weight', 'module.image_feature_extractor.backbone.layer3.12.bn3.weight', 'module.image_feature_extractor.backbone.layer3.12.bn3.bias', 'module.image_feature_extractor.backbone.layer3.12.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.12.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.12.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.13.conv1.weight', 'module.image_feature_extractor.backbone.layer3.13.bn1.weight', 'module.image_feature_extractor.backbone.layer3.13.bn1.bias', 'module.image_feature_extractor.backbone.layer3.13.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.13.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.13.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.13.conv2.weight', 'module.image_feature_extractor.backbone.layer3.13.bn2.weight', 'module.image_feature_extractor.backbone.layer3.13.bn2.bias', 'module.image_feature_extractor.backbone.layer3.13.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.13.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.13.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.13.conv3.weight', 'module.image_feature_extractor.backbone.layer3.13.bn3.weight', 'module.image_feature_extractor.backbone.layer3.13.bn3.bias', 'module.image_feature_extractor.backbone.layer3.13.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.13.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.13.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.14.conv1.weight', 'module.image_feature_extractor.backbone.layer3.14.bn1.weight', 'module.image_feature_extractor.backbone.layer3.14.bn1.bias', 'module.image_feature_extractor.backbone.layer3.14.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.14.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.14.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.14.conv2.weight', 'module.image_feature_extractor.backbone.layer3.14.bn2.weight', 'module.image_feature_extractor.backbone.layer3.14.bn2.bias', 'module.image_feature_extractor.backbone.layer3.14.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.14.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.14.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.14.conv3.weight', 'module.image_feature_extractor.backbone.layer3.14.bn3.weight', 'module.image_feature_extractor.backbone.layer3.14.bn3.bias', 'module.image_feature_extractor.backbone.layer3.14.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.14.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.14.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.15.conv1.weight', 'module.image_feature_extractor.backbone.layer3.15.bn1.weight', 'module.image_feature_extractor.backbone.layer3.15.bn1.bias', 'module.image_feature_extractor.backbone.layer3.15.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.15.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.15.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.15.conv2.weight', 'module.image_feature_extractor.backbone.layer3.15.bn2.weight', 'module.image_feature_extractor.backbone.layer3.15.bn2.bias', 'module.image_feature_extractor.backbone.layer3.15.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.15.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.15.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.15.conv3.weight', 'module.image_feature_extractor.backbone.layer3.15.bn3.weight', 'module.image_feature_extractor.backbone.layer3.15.bn3.bias', 'module.image_feature_extractor.backbone.layer3.15.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.15.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.15.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.16.conv1.weight', 'module.image_feature_extractor.backbone.layer3.16.bn1.weight', 'module.image_feature_extractor.backbone.layer3.16.bn1.bias', 'module.image_feature_extractor.backbone.layer3.16.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.16.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.16.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.16.conv2.weight', 'module.image_feature_extractor.backbone.layer3.16.bn2.weight', 'module.image_feature_extractor.backbone.layer3.16.bn2.bias', 'module.image_feature_extractor.backbone.layer3.16.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.16.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.16.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.16.conv3.weight', 'module.image_feature_extractor.backbone.layer3.16.bn3.weight', 'module.image_feature_extractor.backbone.layer3.16.bn3.bias', 'module.image_feature_extractor.backbone.layer3.16.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.16.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.16.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.17.conv1.weight', 'module.image_feature_extractor.backbone.layer3.17.bn1.weight', 'module.image_feature_extractor.backbone.layer3.17.bn1.bias', 'module.image_feature_extractor.backbone.layer3.17.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.17.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.17.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.17.conv2.weight', 'module.image_feature_extractor.backbone.layer3.17.bn2.weight', 'module.image_feature_extractor.backbone.layer3.17.bn2.bias', 'module.image_feature_extractor.backbone.layer3.17.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.17.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.17.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.17.conv3.weight', 'module.image_feature_extractor.backbone.layer3.17.bn3.weight', 'module.image_feature_extractor.backbone.layer3.17.bn3.bias', 'module.image_feature_extractor.backbone.layer3.17.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.17.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.17.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.18.conv1.weight', 'module.image_feature_extractor.backbone.layer3.18.bn1.weight', 'module.image_feature_extractor.backbone.layer3.18.bn1.bias', 'module.image_feature_extractor.backbone.layer3.18.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.18.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.18.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.18.conv2.weight', 'module.image_feature_extractor.backbone.layer3.18.bn2.weight', 'module.image_feature_extractor.backbone.layer3.18.bn2.bias', 'module.image_feature_extractor.backbone.layer3.18.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.18.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.18.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.18.conv3.weight', 'module.image_feature_extractor.backbone.layer3.18.bn3.weight', 'module.image_feature_extractor.backbone.layer3.18.bn3.bias', 'module.image_feature_extractor.backbone.layer3.18.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.18.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.18.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.19.conv1.weight', 'module.image_feature_extractor.backbone.layer3.19.bn1.weight', 'module.image_feature_extractor.backbone.layer3.19.bn1.bias', 'module.image_feature_extractor.backbone.layer3.19.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.19.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.19.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.19.conv2.weight', 'module.image_feature_extractor.backbone.layer3.19.bn2.weight', 'module.image_feature_extractor.backbone.layer3.19.bn2.bias', 'module.image_feature_extractor.backbone.layer3.19.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.19.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.19.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.19.conv3.weight', 'module.image_feature_extractor.backbone.layer3.19.bn3.weight', 'module.image_feature_extractor.backbone.layer3.19.bn3.bias', 'module.image_feature_extractor.backbone.layer3.19.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.19.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.19.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.20.conv1.weight', 'module.image_feature_extractor.backbone.layer3.20.bn1.weight', 'module.image_feature_extractor.backbone.layer3.20.bn1.bias', 'module.image_feature_extractor.backbone.layer3.20.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.20.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.20.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.20.conv2.weight', 'module.image_feature_extractor.backbone.layer3.20.bn2.weight', 'module.image_feature_extractor.backbone.layer3.20.bn2.bias', 'module.image_feature_extractor.backbone.layer3.20.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.20.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.20.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.20.conv3.weight', 'module.image_feature_extractor.backbone.layer3.20.bn3.weight', 'module.image_feature_extractor.backbone.layer3.20.bn3.bias', 'module.image_feature_extractor.backbone.layer3.20.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.20.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.20.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.21.conv1.weight', 'module.image_feature_extractor.backbone.layer3.21.bn1.weight', 'module.image_feature_extractor.backbone.layer3.21.bn1.bias', 'module.image_feature_extractor.backbone.layer3.21.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.21.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.21.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.21.conv2.weight', 'module.image_feature_extractor.backbone.layer3.21.bn2.weight', 'module.image_feature_extractor.backbone.layer3.21.bn2.bias', 'module.image_feature_extractor.backbone.layer3.21.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.21.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.21.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.21.conv3.weight', 'module.image_feature_extractor.backbone.layer3.21.bn3.weight', 'module.image_feature_extractor.backbone.layer3.21.bn3.bias', 'module.image_feature_extractor.backbone.layer3.21.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.21.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.21.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.22.conv1.weight', 'module.image_feature_extractor.backbone.layer3.22.bn1.weight', 'module.image_feature_extractor.backbone.layer3.22.bn1.bias', 'module.image_feature_extractor.backbone.layer3.22.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.22.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.22.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.22.conv2.weight', 'module.image_feature_extractor.backbone.layer3.22.bn2.weight', 'module.image_feature_extractor.backbone.layer3.22.bn2.bias', 'module.image_feature_extractor.backbone.layer3.22.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.22.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.22.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.22.conv3.weight', 'module.image_feature_extractor.backbone.layer3.22.bn3.weight', 'module.image_feature_extractor.backbone.layer3.22.bn3.bias', 'module.image_feature_extractor.backbone.layer3.22.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.22.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.22.bn3.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.conv1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.conv2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.conv3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.0.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.1.conv1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.1.conv2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.bias', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.1.conv3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.bias', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.2.conv1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.2.conv2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.bias', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.2.conv3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.bias', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.num_batches_tracked', 'module.image_feature_extractor.head.0.0.conv1.weight', 'module.image_feature_extractor.head.0.0.bn1.weight', 'module.image_feature_extractor.head.0.0.bn1.bias', 'module.image_feature_extractor.head.0.0.bn1.running_mean', 'module.image_feature_extractor.head.0.0.bn1.running_var', 'module.image_feature_extractor.head.0.0.bn1.num_batches_tracked', 'module.image_feature_extractor.head.0.0.conv2.weight', 'module.image_feature_extractor.head.0.0.bn2.weight', 'module.image_feature_extractor.head.0.0.bn2.bias', 'module.image_feature_extractor.head.0.0.bn2.running_mean', 'module.image_feature_extractor.head.0.0.bn2.running_var', 'module.image_feature_extractor.head.0.0.bn2.num_batches_tracked', 'module.image_feature_extractor.head.0.0.conv3.weight', 'module.image_feature_extractor.head.0.0.bn3.weight', 'module.image_feature_extractor.head.0.0.bn3.bias', 'module.image_feature_extractor.head.0.0.bn3.running_mean', 'module.image_feature_extractor.head.0.0.bn3.running_var', 'module.image_feature_extractor.head.0.0.bn3.num_batches_tracked', 'module.image_feature_extractor.head.0.0.downsample.0.weight', 'module.image_feature_extractor.head.0.0.downsample.1.weight', 'module.image_feature_extractor.head.0.0.downsample.1.bias', 'module.image_feature_extractor.head.0.0.downsample.1.running_mean', 'module.image_feature_extractor.head.0.0.downsample.1.running_var', 'module.image_feature_extractor.head.0.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.head.0.1.conv1.weight', 'module.image_feature_extractor.head.0.1.bn1.weight', 'module.image_feature_extractor.head.0.1.bn1.bias', 'module.image_feature_extractor.head.0.1.bn1.running_mean', 'module.image_feature_extractor.head.0.1.bn1.running_var', 'module.image_feature_extractor.head.0.1.bn1.num_batches_tracked', 'module.image_feature_extractor.head.0.1.conv2.weight', 'module.image_feature_extractor.head.0.1.bn2.weight', 'module.image_feature_extractor.head.0.1.bn2.bias', 'module.image_feature_extractor.head.0.1.bn2.running_mean', 'module.image_feature_extractor.head.0.1.bn2.running_var', 'module.image_feature_extractor.head.0.1.bn2.num_batches_tracked', 'module.image_feature_extractor.head.0.1.conv3.weight', 'module.image_feature_extractor.head.0.1.bn3.weight', 'module.image_feature_extractor.head.0.1.bn3.bias', 'module.image_feature_extractor.head.0.1.bn3.running_mean', 'module.image_feature_extractor.head.0.1.bn3.running_var', 'module.image_feature_extractor.head.0.1.bn3.num_batches_tracked', 'module.image_feature_extractor.head.0.2.conv1.weight', 'module.image_feature_extractor.head.0.2.bn1.weight', 'module.image_feature_extractor.head.0.2.bn1.bias', 'module.image_feature_extractor.head.0.2.bn1.running_mean', 'module.image_feature_extractor.head.0.2.bn1.running_var', 'module.image_feature_extractor.head.0.2.bn1.num_batches_tracked', 'module.image_feature_extractor.head.0.2.conv2.weight', 'module.image_feature_extractor.head.0.2.bn2.weight', 'module.image_feature_extractor.head.0.2.bn2.bias', 'module.image_feature_extractor.head.0.2.bn2.running_mean', 'module.image_feature_extractor.head.0.2.bn2.running_var', 'module.image_feature_extractor.head.0.2.bn2.num_batches_tracked', 'module.image_feature_extractor.head.0.2.conv3.weight', 'module.image_feature_extractor.head.0.2.bn3.weight', 'module.image_feature_extractor.head.0.2.bn3.bias', 'module.image_feature_extractor.head.0.2.bn3.running_mean', 'module.image_feature_extractor.head.0.2.bn3.running_var', 'module.image_feature_extractor.head.0.2.bn3.num_batches_tracked', 'module.image_feature_extractor.obj_downsample.1.weight', 'module.image_feature_extractor.obj_downsample.1.bias', 'module.object_linguistic_embeddings.weight', 'module.vlbert.word_embeddings.weight', 'module.vlbert.end_embedding.weight', 'module.vlbert.position_embeddings.weight', 'module.vlbert.token_type_embeddings.weight', 'module.vlbert.embedding_LayerNorm.weight', 'module.vlbert.embedding_LayerNorm.bias', 'module.vlbert.visual_ln_text.weight', 'module.vlbert.visual_ln_text.bias', 'module.vlbert.visual_ln_object.weight', 'module.vlbert.visual_ln_object.bias', 'module.vlbert.encoder.layer.0.attention.self.query.weight', 'module.vlbert.encoder.layer.0.attention.self.query.bias', 'module.vlbert.encoder.layer.0.attention.self.key.weight', 'module.vlbert.encoder.layer.0.attention.self.key.bias', 'module.vlbert.encoder.layer.0.attention.self.value.weight', 'module.vlbert.encoder.layer.0.attention.self.value.bias', 'module.vlbert.encoder.layer.0.attention.output.dense.weight', 'module.vlbert.encoder.layer.0.attention.output.dense.bias', 'module.vlbert.encoder.layer.0.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.0.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.0.intermediate.dense.weight', 'module.vlbert.encoder.layer.0.intermediate.dense.bias', 'module.vlbert.encoder.layer.0.output.dense.weight', 'module.vlbert.encoder.layer.0.output.dense.bias', 'module.vlbert.encoder.layer.0.output.LayerNorm.weight', 'module.vlbert.encoder.layer.0.output.LayerNorm.bias', 'module.vlbert.encoder.layer.1.attention.self.query.weight', 'module.vlbert.encoder.layer.1.attention.self.query.bias', 'module.vlbert.encoder.layer.1.attention.self.key.weight', 'module.vlbert.encoder.layer.1.attention.self.key.bias', 'module.vlbert.encoder.layer.1.attention.self.value.weight', 'module.vlbert.encoder.layer.1.attention.self.value.bias', 'module.vlbert.encoder.layer.1.attention.output.dense.weight', 'module.vlbert.encoder.layer.1.attention.output.dense.bias', 'module.vlbert.encoder.layer.1.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.1.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.1.intermediate.dense.weight', 'module.vlbert.encoder.layer.1.intermediate.dense.bias', 'module.vlbert.encoder.layer.1.output.dense.weight', 'module.vlbert.encoder.layer.1.output.dense.bias', 'module.vlbert.encoder.layer.1.output.LayerNorm.weight', 'module.vlbert.encoder.layer.1.output.LayerNorm.bias', 'module.vlbert.encoder.layer.2.attention.self.query.weight', 'module.vlbert.encoder.layer.2.attention.self.query.bias', 'module.vlbert.encoder.layer.2.attention.self.key.weight', 'module.vlbert.encoder.layer.2.attention.self.key.bias', 'module.vlbert.encoder.layer.2.attention.self.value.weight', 'module.vlbert.encoder.layer.2.attention.self.value.bias', 'module.vlbert.encoder.layer.2.attention.output.dense.weight', 'module.vlbert.encoder.layer.2.attention.output.dense.bias', 'module.vlbert.encoder.layer.2.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.2.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.2.intermediate.dense.weight', 'module.vlbert.encoder.layer.2.intermediate.dense.bias', 'module.vlbert.encoder.layer.2.output.dense.weight', 'module.vlbert.encoder.layer.2.output.dense.bias', 'module.vlbert.encoder.layer.2.output.LayerNorm.weight', 'module.vlbert.encoder.layer.2.output.LayerNorm.bias', 'module.vlbert.encoder.layer.3.attention.self.query.weight', 'module.vlbert.encoder.layer.3.attention.self.query.bias', 'module.vlbert.encoder.layer.3.attention.self.key.weight', 'module.vlbert.encoder.layer.3.attention.self.key.bias', 'module.vlbert.encoder.layer.3.attention.self.value.weight', 'module.vlbert.encoder.layer.3.attention.self.value.bias', 'module.vlbert.encoder.layer.3.attention.output.dense.weight', 'module.vlbert.encoder.layer.3.attention.output.dense.bias', 'module.vlbert.encoder.layer.3.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.3.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.3.intermediate.dense.weight', 'module.vlbert.encoder.layer.3.intermediate.dense.bias', 'module.vlbert.encoder.layer.3.output.dense.weight', 'module.vlbert.encoder.layer.3.output.dense.bias', 'module.vlbert.encoder.layer.3.output.LayerNorm.weight', 'module.vlbert.encoder.layer.3.output.LayerNorm.bias', 'module.vlbert.encoder.layer.4.attention.self.query.weight', 'module.vlbert.encoder.layer.4.attention.self.query.bias', 'module.vlbert.encoder.layer.4.attention.self.key.weight', 'module.vlbert.encoder.layer.4.attention.self.key.bias', 'module.vlbert.encoder.layer.4.attention.self.value.weight', 'module.vlbert.encoder.layer.4.attention.self.value.bias', 'module.vlbert.encoder.layer.4.attention.output.dense.weight', 'module.vlbert.encoder.layer.4.attention.output.dense.bias', 'module.vlbert.encoder.layer.4.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.4.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.4.intermediate.dense.weight', 'module.vlbert.encoder.layer.4.intermediate.dense.bias', 'module.vlbert.encoder.layer.4.output.dense.weight', 'module.vlbert.encoder.layer.4.output.dense.bias', 'module.vlbert.encoder.layer.4.output.LayerNorm.weight', 'module.vlbert.encoder.layer.4.output.LayerNorm.bias', 'module.vlbert.encoder.layer.5.attention.self.query.weight', 'module.vlbert.encoder.layer.5.attention.self.query.bias', 'module.vlbert.encoder.layer.5.attention.self.key.weight', 'module.vlbert.encoder.layer.5.attention.self.key.bias', 'module.vlbert.encoder.layer.5.attention.self.value.weight', 'module.vlbert.encoder.layer.5.attention.self.value.bias', 'module.vlbert.encoder.layer.5.attention.output.dense.weight', 'module.vlbert.encoder.layer.5.attention.output.dense.bias', 'module.vlbert.encoder.layer.5.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.5.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.5.intermediate.dense.weight', 'module.vlbert.encoder.layer.5.intermediate.dense.bias', 'module.vlbert.encoder.layer.5.output.dense.weight', 'module.vlbert.encoder.layer.5.output.dense.bias', 'module.vlbert.encoder.layer.5.output.LayerNorm.weight', 'module.vlbert.encoder.layer.5.output.LayerNorm.bias', 'module.vlbert.encoder.layer.6.attention.self.query.weight', 'module.vlbert.encoder.layer.6.attention.self.query.bias', 'module.vlbert.encoder.layer.6.attention.self.key.weight', 'module.vlbert.encoder.layer.6.attention.self.key.bias', 'module.vlbert.encoder.layer.6.attention.self.value.weight', 'module.vlbert.encoder.layer.6.attention.self.value.bias', 'module.vlbert.encoder.layer.6.attention.output.dense.weight', 'module.vlbert.encoder.layer.6.attention.output.dense.bias', 'module.vlbert.encoder.layer.6.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.6.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.6.intermediate.dense.weight', 'module.vlbert.encoder.layer.6.intermediate.dense.bias', 'module.vlbert.encoder.layer.6.output.dense.weight', 'module.vlbert.encoder.layer.6.output.dense.bias', 'module.vlbert.encoder.layer.6.output.LayerNorm.weight', 'module.vlbert.encoder.layer.6.output.LayerNorm.bias', 'module.vlbert.encoder.layer.7.attention.self.query.weight', 'module.vlbert.encoder.layer.7.attention.self.query.bias', 'module.vlbert.encoder.layer.7.attention.self.key.weight', 'module.vlbert.encoder.layer.7.attention.self.key.bias', 'module.vlbert.encoder.layer.7.attention.self.value.weight', 'module.vlbert.encoder.layer.7.attention.self.value.bias', 'module.vlbert.encoder.layer.7.attention.output.dense.weight', 'module.vlbert.encoder.layer.7.attention.output.dense.bias', 'module.vlbert.encoder.layer.7.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.7.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.7.intermediate.dense.weight', 'module.vlbert.encoder.layer.7.intermediate.dense.bias', 'module.vlbert.encoder.layer.7.output.dense.weight', 'module.vlbert.encoder.layer.7.output.dense.bias', 'module.vlbert.encoder.layer.7.output.LayerNorm.weight', 'module.vlbert.encoder.layer.7.output.LayerNorm.bias', 'module.vlbert.encoder.layer.8.attention.self.query.weight', 'module.vlbert.encoder.layer.8.attention.self.query.bias', 'module.vlbert.encoder.layer.8.attention.self.key.weight', 'module.vlbert.encoder.layer.8.attention.self.key.bias', 'module.vlbert.encoder.layer.8.attention.self.value.weight', 'module.vlbert.encoder.layer.8.attention.self.value.bias', 'module.vlbert.encoder.layer.8.attention.output.dense.weight', 'module.vlbert.encoder.layer.8.attention.output.dense.bias', 'module.vlbert.encoder.layer.8.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.8.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.8.intermediate.dense.weight', 'module.vlbert.encoder.layer.8.intermediate.dense.bias', 'module.vlbert.encoder.layer.8.output.dense.weight', 'module.vlbert.encoder.layer.8.output.dense.bias', 'module.vlbert.encoder.layer.8.output.LayerNorm.weight', 'module.vlbert.encoder.layer.8.output.LayerNorm.bias', 'module.vlbert.encoder.layer.9.attention.self.query.weight', 'module.vlbert.encoder.layer.9.attention.self.query.bias', 'module.vlbert.encoder.layer.9.attention.self.key.weight', 'module.vlbert.encoder.layer.9.attention.self.key.bias', 'module.vlbert.encoder.layer.9.attention.self.value.weight', 'module.vlbert.encoder.layer.9.attention.self.value.bias', 'module.vlbert.encoder.layer.9.attention.output.dense.weight', 'module.vlbert.encoder.layer.9.attention.output.dense.bias', 'module.vlbert.encoder.layer.9.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.9.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.9.intermediate.dense.weight', 'module.vlbert.encoder.layer.9.intermediate.dense.bias', 'module.vlbert.encoder.layer.9.output.dense.weight', 'module.vlbert.encoder.layer.9.output.dense.bias', 'module.vlbert.encoder.layer.9.output.LayerNorm.weight', 'module.vlbert.encoder.layer.9.output.LayerNorm.bias', 'module.vlbert.encoder.layer.10.attention.self.query.weight', 'module.vlbert.encoder.layer.10.attention.self.query.bias', 'module.vlbert.encoder.layer.10.attention.self.key.weight', 'module.vlbert.encoder.layer.10.attention.self.key.bias', 'module.vlbert.encoder.layer.10.attention.self.value.weight', 'module.vlbert.encoder.layer.10.attention.self.value.bias', 'module.vlbert.encoder.layer.10.attention.output.dense.weight', 'module.vlbert.encoder.layer.10.attention.output.dense.bias', 'module.vlbert.encoder.layer.10.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.10.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.10.intermediate.dense.weight', 'module.vlbert.encoder.layer.10.intermediate.dense.bias', 'module.vlbert.encoder.layer.10.output.dense.weight', 'module.vlbert.encoder.layer.10.output.dense.bias', 'module.vlbert.encoder.layer.10.output.LayerNorm.weight', 'module.vlbert.encoder.layer.10.output.LayerNorm.bias', 'module.vlbert.encoder.layer.11.attention.self.query.weight', 'module.vlbert.encoder.layer.11.attention.self.query.bias', 'module.vlbert.encoder.layer.11.attention.self.key.weight', 'module.vlbert.encoder.layer.11.attention.self.key.bias', 'module.vlbert.encoder.layer.11.attention.self.value.weight', 'module.vlbert.encoder.layer.11.attention.self.value.bias', 'module.vlbert.encoder.layer.11.attention.output.dense.weight', 'module.vlbert.encoder.layer.11.attention.output.dense.bias', 'module.vlbert.encoder.layer.11.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.11.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.11.intermediate.dense.weight', 'module.vlbert.encoder.layer.11.intermediate.dense.bias', 'module.vlbert.encoder.layer.11.output.dense.weight', 'module.vlbert.encoder.layer.11.output.dense.bias', 'module.vlbert.encoder.layer.11.output.LayerNorm.weight', 'module.vlbert.encoder.layer.11.output.LayerNorm.bias', 'module.vlbert.mlm_head.predictions.bias', 'module.vlbert.mlm_head.predictions.transform.dense.weight', 'module.vlbert.mlm_head.predictions.transform.dense.bias', 'module.vlbert.mlm_head.predictions.transform.LayerNorm.weight', 'module.vlbert.mlm_head.predictions.transform.LayerNorm.bias', 'module.vlbert.mlm_head.predictions.decoder.weight'])
[Partial Load] non matched keys: ['object_mask_word_embedding.weight', 'aux_text_visual_embedding.weight', 'vlbert.mvrc_head.transform.dense.weight', 'vlbert.mvrc_head.transform.dense.bias', 'vlbert.mvrc_head.region_cls_pred.weight', 'vlbert.mvrc_head.region_cls_pred.bias']
[Partial Load] non pretrain keys: []
[Partial Load] partial load state dict of keys: dict_keys(['module.image_feature_extractor.backbone.conv1.weight', 'module.image_feature_extractor.backbone.bn1.weight', 'module.image_feature_extractor.backbone.bn1.bias', 'module.image_feature_extractor.backbone.bn1.running_mean', 'module.image_feature_extractor.backbone.bn1.running_var', 'module.image_feature_extractor.backbone.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.conv1.weight', 'module.image_feature_extractor.backbone.layer1.0.bn1.weight', 'module.image_feature_extractor.backbone.layer1.0.bn1.bias', 'module.image_feature_extractor.backbone.layer1.0.bn1.running_mean', 'module.image_feature_extractor.backbone.layer1.0.bn1.running_var', 'module.image_feature_extractor.backbone.layer1.0.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.conv2.weight', 'module.image_feature_extractor.backbone.layer1.0.bn2.weight', 'module.image_feature_extractor.backbone.layer1.0.bn2.bias', 'module.image_feature_extractor.backbone.layer1.0.bn2.running_mean', 'module.image_feature_extractor.backbone.layer1.0.bn2.running_var', 'module.image_feature_extractor.backbone.layer1.0.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.conv3.weight', 'module.image_feature_extractor.backbone.layer1.0.bn3.weight', 'module.image_feature_extractor.backbone.layer1.0.bn3.bias', 'module.image_feature_extractor.backbone.layer1.0.bn3.running_mean', 'module.image_feature_extractor.backbone.layer1.0.bn3.running_var', 'module.image_feature_extractor.backbone.layer1.0.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.downsample.0.weight', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.weight', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.bias', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.running_mean', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.running_var', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.1.conv1.weight', 'module.image_feature_extractor.backbone.layer1.1.bn1.weight', 'module.image_feature_extractor.backbone.layer1.1.bn1.bias', 'module.image_feature_extractor.backbone.layer1.1.bn1.running_mean', 'module.image_feature_extractor.backbone.layer1.1.bn1.running_var', 'module.image_feature_extractor.backbone.layer1.1.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.1.conv2.weight', 'module.image_feature_extractor.backbone.layer1.1.bn2.weight', 'module.image_feature_extractor.backbone.layer1.1.bn2.bias', 'module.image_feature_extractor.backbone.layer1.1.bn2.running_mean', 'module.image_feature_extractor.backbone.layer1.1.bn2.running_var', 'module.image_feature_extractor.backbone.layer1.1.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.1.conv3.weight', 'module.image_feature_extractor.backbone.layer1.1.bn3.weight', 'module.image_feature_extractor.backbone.layer1.1.bn3.bias', 'module.image_feature_extractor.backbone.layer1.1.bn3.running_mean', 'module.image_feature_extractor.backbone.layer1.1.bn3.running_var', 'module.image_feature_extractor.backbone.layer1.1.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.2.conv1.weight', 'module.image_feature_extractor.backbone.layer1.2.bn1.weight', 'module.image_feature_extractor.backbone.layer1.2.bn1.bias', 'module.image_feature_extractor.backbone.layer1.2.bn1.running_mean', 'module.image_feature_extractor.backbone.layer1.2.bn1.running_var', 'module.image_feature_extractor.backbone.layer1.2.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.2.conv2.weight', 'module.image_feature_extractor.backbone.layer1.2.bn2.weight', 'module.image_feature_extractor.backbone.layer1.2.bn2.bias', 'module.image_feature_extractor.backbone.layer1.2.bn2.running_mean', 'module.image_feature_extractor.backbone.layer1.2.bn2.running_var', 'module.image_feature_extractor.backbone.layer1.2.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.2.conv3.weight', 'module.image_feature_extractor.backbone.layer1.2.bn3.weight', 'module.image_feature_extractor.backbone.layer1.2.bn3.bias', 'module.image_feature_extractor.backbone.layer1.2.bn3.running_mean', 'module.image_feature_extractor.backbone.layer1.2.bn3.running_var', 'module.image_feature_extractor.backbone.layer1.2.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.conv1.weight', 'module.image_feature_extractor.backbone.layer2.0.bn1.weight', 'module.image_feature_extractor.backbone.layer2.0.bn1.bias', 'module.image_feature_extractor.backbone.layer2.0.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.0.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.0.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.conv2.weight', 'module.image_feature_extractor.backbone.layer2.0.bn2.weight', 'module.image_feature_extractor.backbone.layer2.0.bn2.bias', 'module.image_feature_extractor.backbone.layer2.0.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.0.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.0.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.conv3.weight', 'module.image_feature_extractor.backbone.layer2.0.bn3.weight', 'module.image_feature_extractor.backbone.layer2.0.bn3.bias', 'module.image_feature_extractor.backbone.layer2.0.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.0.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.0.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.downsample.0.weight', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.weight', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.bias', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.running_mean', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.running_var', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.1.conv1.weight', 'module.image_feature_extractor.backbone.layer2.1.bn1.weight', 'module.image_feature_extractor.backbone.layer2.1.bn1.bias', 'module.image_feature_extractor.backbone.layer2.1.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.1.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.1.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.1.conv2.weight', 'module.image_feature_extractor.backbone.layer2.1.bn2.weight', 'module.image_feature_extractor.backbone.layer2.1.bn2.bias', 'module.image_feature_extractor.backbone.layer2.1.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.1.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.1.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.1.conv3.weight', 'module.image_feature_extractor.backbone.layer2.1.bn3.weight', 'module.image_feature_extractor.backbone.layer2.1.bn3.bias', 'module.image_feature_extractor.backbone.layer2.1.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.1.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.1.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.2.conv1.weight', 'module.image_feature_extractor.backbone.layer2.2.bn1.weight', 'module.image_feature_extractor.backbone.layer2.2.bn1.bias', 'module.image_feature_extractor.backbone.layer2.2.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.2.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.2.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.2.conv2.weight', 'module.image_feature_extractor.backbone.layer2.2.bn2.weight', 'module.image_feature_extractor.backbone.layer2.2.bn2.bias', 'module.image_feature_extractor.backbone.layer2.2.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.2.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.2.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.2.conv3.weight', 'module.image_feature_extractor.backbone.layer2.2.bn3.weight', 'module.image_feature_extractor.backbone.layer2.2.bn3.bias', 'module.image_feature_extractor.backbone.layer2.2.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.2.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.2.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.3.conv1.weight', 'module.image_feature_extractor.backbone.layer2.3.bn1.weight', 'module.image_feature_extractor.backbone.layer2.3.bn1.bias', 'module.image_feature_extractor.backbone.layer2.3.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.3.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.3.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.3.conv2.weight', 'module.image_feature_extractor.backbone.layer2.3.bn2.weight', 'module.image_feature_extractor.backbone.layer2.3.bn2.bias', 'module.image_feature_extractor.backbone.layer2.3.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.3.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.3.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.3.conv3.weight', 'module.image_feature_extractor.backbone.layer2.3.bn3.weight', 'module.image_feature_extractor.backbone.layer2.3.bn3.bias', 'module.image_feature_extractor.backbone.layer2.3.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.3.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.3.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.conv1.weight', 'module.image_feature_extractor.backbone.layer3.0.bn1.weight', 'module.image_feature_extractor.backbone.layer3.0.bn1.bias', 'module.image_feature_extractor.backbone.layer3.0.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.0.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.0.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.conv2.weight', 'module.image_feature_extractor.backbone.layer3.0.bn2.weight', 'module.image_feature_extractor.backbone.layer3.0.bn2.bias', 'module.image_feature_extractor.backbone.layer3.0.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.0.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.0.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.conv3.weight', 'module.image_feature_extractor.backbone.layer3.0.bn3.weight', 'module.image_feature_extractor.backbone.layer3.0.bn3.bias', 'module.image_feature_extractor.backbone.layer3.0.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.0.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.0.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.downsample.0.weight', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.weight', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.bias', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.running_mean', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.running_var', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.1.conv1.weight', 'module.image_feature_extractor.backbone.layer3.1.bn1.weight', 'module.image_feature_extractor.backbone.layer3.1.bn1.bias', 'module.image_feature_extractor.backbone.layer3.1.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.1.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.1.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.1.conv2.weight', 'module.image_feature_extractor.backbone.layer3.1.bn2.weight', 'module.image_feature_extractor.backbone.layer3.1.bn2.bias', 'module.image_feature_extractor.backbone.layer3.1.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.1.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.1.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.1.conv3.weight', 'module.image_feature_extractor.backbone.layer3.1.bn3.weight', 'module.image_feature_extractor.backbone.layer3.1.bn3.bias', 'module.image_feature_extractor.backbone.layer3.1.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.1.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.1.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.2.conv1.weight', 'module.image_feature_extractor.backbone.layer3.2.bn1.weight', 'module.image_feature_extractor.backbone.layer3.2.bn1.bias', 'module.image_feature_extractor.backbone.layer3.2.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.2.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.2.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.2.conv2.weight', 'module.image_feature_extractor.backbone.layer3.2.bn2.weight', 'module.image_feature_extractor.backbone.layer3.2.bn2.bias', 'module.image_feature_extractor.backbone.layer3.2.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.2.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.2.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.2.conv3.weight', 'module.image_feature_extractor.backbone.layer3.2.bn3.weight', 'module.image_feature_extractor.backbone.layer3.2.bn3.bias', 'module.image_feature_extractor.backbone.layer3.2.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.2.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.2.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.3.conv1.weight', 'module.image_feature_extractor.backbone.layer3.3.bn1.weight', 'module.image_feature_extractor.backbone.layer3.3.bn1.bias', 'module.image_feature_extractor.backbone.layer3.3.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.3.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.3.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.3.conv2.weight', 'module.image_feature_extractor.backbone.layer3.3.bn2.weight', 'module.image_feature_extractor.backbone.layer3.3.bn2.bias', 'module.image_feature_extractor.backbone.layer3.3.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.3.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.3.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.3.conv3.weight', 'module.image_feature_extractor.backbone.layer3.3.bn3.weight', 'module.image_feature_extractor.backbone.layer3.3.bn3.bias', 'module.image_feature_extractor.backbone.layer3.3.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.3.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.3.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.4.conv1.weight', 'module.image_feature_extractor.backbone.layer3.4.bn1.weight', 'module.image_feature_extractor.backbone.layer3.4.bn1.bias', 'module.image_feature_extractor.backbone.layer3.4.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.4.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.4.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.4.conv2.weight', 'module.image_feature_extractor.backbone.layer3.4.bn2.weight', 'module.image_feature_extractor.backbone.layer3.4.bn2.bias', 'module.image_feature_extractor.backbone.layer3.4.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.4.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.4.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.4.conv3.weight', 'module.image_feature_extractor.backbone.layer3.4.bn3.weight', 'module.image_feature_extractor.backbone.layer3.4.bn3.bias', 'module.image_feature_extractor.backbone.layer3.4.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.4.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.4.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.5.conv1.weight', 'module.image_feature_extractor.backbone.layer3.5.bn1.weight', 'module.image_feature_extractor.backbone.layer3.5.bn1.bias', 'module.image_feature_extractor.backbone.layer3.5.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.5.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.5.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.5.conv2.weight', 'module.image_feature_extractor.backbone.layer3.5.bn2.weight', 'module.image_feature_extractor.backbone.layer3.5.bn2.bias', 'module.image_feature_extractor.backbone.layer3.5.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.5.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.5.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.5.conv3.weight', 'module.image_feature_extractor.backbone.layer3.5.bn3.weight', 'module.image_feature_extractor.backbone.layer3.5.bn3.bias', 'module.image_feature_extractor.backbone.layer3.5.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.5.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.5.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.6.conv1.weight', 'module.image_feature_extractor.backbone.layer3.6.bn1.weight', 'module.image_feature_extractor.backbone.layer3.6.bn1.bias', 'module.image_feature_extractor.backbone.layer3.6.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.6.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.6.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.6.conv2.weight', 'module.image_feature_extractor.backbone.layer3.6.bn2.weight', 'module.image_feature_extractor.backbone.layer3.6.bn2.bias', 'module.image_feature_extractor.backbone.layer3.6.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.6.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.6.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.6.conv3.weight', 'module.image_feature_extractor.backbone.layer3.6.bn3.weight', 'module.image_feature_extractor.backbone.layer3.6.bn3.bias', 'module.image_feature_extractor.backbone.layer3.6.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.6.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.6.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.7.conv1.weight', 'module.image_feature_extractor.backbone.layer3.7.bn1.weight', 'module.image_feature_extractor.backbone.layer3.7.bn1.bias', 'module.image_feature_extractor.backbone.layer3.7.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.7.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.7.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.7.conv2.weight', 'module.image_feature_extractor.backbone.layer3.7.bn2.weight', 'module.image_feature_extractor.backbone.layer3.7.bn2.bias', 'module.image_feature_extractor.backbone.layer3.7.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.7.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.7.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.7.conv3.weight', 'module.image_feature_extractor.backbone.layer3.7.bn3.weight', 'module.image_feature_extractor.backbone.layer3.7.bn3.bias', 'module.image_feature_extractor.backbone.layer3.7.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.7.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.7.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.8.conv1.weight', 'module.image_feature_extractor.backbone.layer3.8.bn1.weight', 'module.image_feature_extractor.backbone.layer3.8.bn1.bias', 'module.image_feature_extractor.backbone.layer3.8.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.8.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.8.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.8.conv2.weight', 'module.image_feature_extractor.backbone.layer3.8.bn2.weight', 'module.image_feature_extractor.backbone.layer3.8.bn2.bias', 'module.image_feature_extractor.backbone.layer3.8.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.8.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.8.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.8.conv3.weight', 'module.image_feature_extractor.backbone.layer3.8.bn3.weight', 'module.image_feature_extractor.backbone.layer3.8.bn3.bias', 'module.image_feature_extractor.backbone.layer3.8.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.8.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.8.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.9.conv1.weight', 'module.image_feature_extractor.backbone.layer3.9.bn1.weight', 'module.image_feature_extractor.backbone.layer3.9.bn1.bias', 'module.image_feature_extractor.backbone.layer3.9.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.9.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.9.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.9.conv2.weight', 'module.image_feature_extractor.backbone.layer3.9.bn2.weight', 'module.image_feature_extractor.backbone.layer3.9.bn2.bias', 'module.image_feature_extractor.backbone.layer3.9.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.9.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.9.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.9.conv3.weight', 'module.image_feature_extractor.backbone.layer3.9.bn3.weight', 'module.image_feature_extractor.backbone.layer3.9.bn3.bias', 'module.image_feature_extractor.backbone.layer3.9.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.9.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.9.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.10.conv1.weight', 'module.image_feature_extractor.backbone.layer3.10.bn1.weight', 'module.image_feature_extractor.backbone.layer3.10.bn1.bias', 'module.image_feature_extractor.backbone.layer3.10.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.10.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.10.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.10.conv2.weight', 'module.image_feature_extractor.backbone.layer3.10.bn2.weight', 'module.image_feature_extractor.backbone.layer3.10.bn2.bias', 'module.image_feature_extractor.backbone.layer3.10.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.10.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.10.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.10.conv3.weight', 'module.image_feature_extractor.backbone.layer3.10.bn3.weight', 'module.image_feature_extractor.backbone.layer3.10.bn3.bias', 'module.image_feature_extractor.backbone.layer3.10.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.10.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.10.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.11.conv1.weight', 'module.image_feature_extractor.backbone.layer3.11.bn1.weight', 'module.image_feature_extractor.backbone.layer3.11.bn1.bias', 'module.image_feature_extractor.backbone.layer3.11.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.11.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.11.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.11.conv2.weight', 'module.image_feature_extractor.backbone.layer3.11.bn2.weight', 'module.image_feature_extractor.backbone.layer3.11.bn2.bias', 'module.image_feature_extractor.backbone.layer3.11.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.11.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.11.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.11.conv3.weight', 'module.image_feature_extractor.backbone.layer3.11.bn3.weight', 'module.image_feature_extractor.backbone.layer3.11.bn3.bias', 'module.image_feature_extractor.backbone.layer3.11.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.11.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.11.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.12.conv1.weight', 'module.image_feature_extractor.backbone.layer3.12.bn1.weight', 'module.image_feature_extractor.backbone.layer3.12.bn1.bias', 'module.image_feature_extractor.backbone.layer3.12.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.12.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.12.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.12.conv2.weight', 'module.image_feature_extractor.backbone.layer3.12.bn2.weight', 'module.image_feature_extractor.backbone.layer3.12.bn2.bias', 'module.image_feature_extractor.backbone.layer3.12.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.12.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.12.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.12.conv3.weight', 'module.image_feature_extractor.backbone.layer3.12.bn3.weight', 'module.image_feature_extractor.backbone.layer3.12.bn3.bias', 'module.image_feature_extractor.backbone.layer3.12.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.12.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.12.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.13.conv1.weight', 'module.image_feature_extractor.backbone.layer3.13.bn1.weight', 'module.image_feature_extractor.backbone.layer3.13.bn1.bias', 'module.image_feature_extractor.backbone.layer3.13.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.13.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.13.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.13.conv2.weight', 'module.image_feature_extractor.backbone.layer3.13.bn2.weight', 'module.image_feature_extractor.backbone.layer3.13.bn2.bias', 'module.image_feature_extractor.backbone.layer3.13.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.13.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.13.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.13.conv3.weight', 'module.image_feature_extractor.backbone.layer3.13.bn3.weight', 'module.image_feature_extractor.backbone.layer3.13.bn3.bias', 'module.image_feature_extractor.backbone.layer3.13.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.13.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.13.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.14.conv1.weight', 'module.image_feature_extractor.backbone.layer3.14.bn1.weight', 'module.image_feature_extractor.backbone.layer3.14.bn1.bias', 'module.image_feature_extractor.backbone.layer3.14.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.14.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.14.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.14.conv2.weight', 'module.image_feature_extractor.backbone.layer3.14.bn2.weight', 'module.image_feature_extractor.backbone.layer3.14.bn2.bias', 'module.image_feature_extractor.backbone.layer3.14.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.14.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.14.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.14.conv3.weight', 'module.image_feature_extractor.backbone.layer3.14.bn3.weight', 'module.image_feature_extractor.backbone.layer3.14.bn3.bias', 'module.image_feature_extractor.backbone.layer3.14.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.14.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.14.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.15.conv1.weight', 'module.image_feature_extractor.backbone.layer3.15.bn1.weight', 'module.image_feature_extractor.backbone.layer3.15.bn1.bias', 'module.image_feature_extractor.backbone.layer3.15.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.15.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.15.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.15.conv2.weight', 'module.image_feature_extractor.backbone.layer3.15.bn2.weight', 'module.image_feature_extractor.backbone.layer3.15.bn2.bias', 'module.image_feature_extractor.backbone.layer3.15.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.15.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.15.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.15.conv3.weight', 'module.image_feature_extractor.backbone.layer3.15.bn3.weight', 'module.image_feature_extractor.backbone.layer3.15.bn3.bias', 'module.image_feature_extractor.backbone.layer3.15.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.15.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.15.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.16.conv1.weight', 'module.image_feature_extractor.backbone.layer3.16.bn1.weight', 'module.image_feature_extractor.backbone.layer3.16.bn1.bias', 'module.image_feature_extractor.backbone.layer3.16.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.16.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.16.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.16.conv2.weight', 'module.image_feature_extractor.backbone.layer3.16.bn2.weight', 'module.image_feature_extractor.backbone.layer3.16.bn2.bias', 'module.image_feature_extractor.backbone.layer3.16.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.16.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.16.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.16.conv3.weight', 'module.image_feature_extractor.backbone.layer3.16.bn3.weight', 'module.image_feature_extractor.backbone.layer3.16.bn3.bias', 'module.image_feature_extractor.backbone.layer3.16.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.16.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.16.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.17.conv1.weight', 'module.image_feature_extractor.backbone.layer3.17.bn1.weight', 'module.image_feature_extractor.backbone.layer3.17.bn1.bias', 'module.image_feature_extractor.backbone.layer3.17.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.17.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.17.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.17.conv2.weight', 'module.image_feature_extractor.backbone.layer3.17.bn2.weight', 'module.image_feature_extractor.backbone.layer3.17.bn2.bias', 'module.image_feature_extractor.backbone.layer3.17.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.17.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.17.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.17.conv3.weight', 'module.image_feature_extractor.backbone.layer3.17.bn3.weight', 'module.image_feature_extractor.backbone.layer3.17.bn3.bias', 'module.image_feature_extractor.backbone.layer3.17.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.17.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.17.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.18.conv1.weight', 'module.image_feature_extractor.backbone.layer3.18.bn1.weight', 'module.image_feature_extractor.backbone.layer3.18.bn1.bias', 'module.image_feature_extractor.backbone.layer3.18.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.18.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.18.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.18.conv2.weight', 'module.image_feature_extractor.backbone.layer3.18.bn2.weight', 'module.image_feature_extractor.backbone.layer3.18.bn2.bias', 'module.image_feature_extractor.backbone.layer3.18.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.18.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.18.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.18.conv3.weight', 'module.image_feature_extractor.backbone.layer3.18.bn3.weight', 'module.image_feature_extractor.backbone.layer3.18.bn3.bias', 'module.image_feature_extractor.backbone.layer3.18.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.18.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.18.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.19.conv1.weight', 'module.image_feature_extractor.backbone.layer3.19.bn1.weight', 'module.image_feature_extractor.backbone.layer3.19.bn1.bias', 'module.image_feature_extractor.backbone.layer3.19.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.19.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.19.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.19.conv2.weight', 'module.image_feature_extractor.backbone.layer3.19.bn2.weight', 'module.image_feature_extractor.backbone.layer3.19.bn2.bias', 'module.image_feature_extractor.backbone.layer3.19.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.19.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.19.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.19.conv3.weight', 'module.image_feature_extractor.backbone.layer3.19.bn3.weight', 'module.image_feature_extractor.backbone.layer3.19.bn3.bias', 'module.image_feature_extractor.backbone.layer3.19.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.19.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.19.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.20.conv1.weight', 'module.image_feature_extractor.backbone.layer3.20.bn1.weight', 'module.image_feature_extractor.backbone.layer3.20.bn1.bias', 'module.image_feature_extractor.backbone.layer3.20.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.20.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.20.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.20.conv2.weight', 'module.image_feature_extractor.backbone.layer3.20.bn2.weight', 'module.image_feature_extractor.backbone.layer3.20.bn2.bias', 'module.image_feature_extractor.backbone.layer3.20.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.20.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.20.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.20.conv3.weight', 'module.image_feature_extractor.backbone.layer3.20.bn3.weight', 'module.image_feature_extractor.backbone.layer3.20.bn3.bias', 'module.image_feature_extractor.backbone.layer3.20.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.20.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.20.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.21.conv1.weight', 'module.image_feature_extractor.backbone.layer3.21.bn1.weight', 'module.image_feature_extractor.backbone.layer3.21.bn1.bias', 'module.image_feature_extractor.backbone.layer3.21.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.21.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.21.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.21.conv2.weight', 'module.image_feature_extractor.backbone.layer3.21.bn2.weight', 'module.image_feature_extractor.backbone.layer3.21.bn2.bias', 'module.image_feature_extractor.backbone.layer3.21.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.21.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.21.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.21.conv3.weight', 'module.image_feature_extractor.backbone.layer3.21.bn3.weight', 'module.image_feature_extractor.backbone.layer3.21.bn3.bias', 'module.image_feature_extractor.backbone.layer3.21.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.21.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.21.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.22.conv1.weight', 'module.image_feature_extractor.backbone.layer3.22.bn1.weight', 'module.image_feature_extractor.backbone.layer3.22.bn1.bias', 'module.image_feature_extractor.backbone.layer3.22.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.22.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.22.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.22.conv2.weight', 'module.image_feature_extractor.backbone.layer3.22.bn2.weight', 'module.image_feature_extractor.backbone.layer3.22.bn2.bias', 'module.image_feature_extractor.backbone.layer3.22.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.22.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.22.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.22.conv3.weight', 'module.image_feature_extractor.backbone.layer3.22.bn3.weight', 'module.image_feature_extractor.backbone.layer3.22.bn3.bias', 'module.image_feature_extractor.backbone.layer3.22.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.22.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.22.bn3.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.conv1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.conv2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.conv3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.0.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.1.conv1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.1.conv2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.bias', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.1.conv3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.bias', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.2.conv1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.2.conv2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.bias', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.2.conv3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.bias', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.num_batches_tracked', 'module.image_feature_extractor.head.0.0.conv1.weight', 'module.image_feature_extractor.head.0.0.bn1.weight', 'module.image_feature_extractor.head.0.0.bn1.bias', 'module.image_feature_extractor.head.0.0.bn1.running_mean', 'module.image_feature_extractor.head.0.0.bn1.running_var', 'module.image_feature_extractor.head.0.0.bn1.num_batches_tracked', 'module.image_feature_extractor.head.0.0.conv2.weight', 'module.image_feature_extractor.head.0.0.bn2.weight', 'module.image_feature_extractor.head.0.0.bn2.bias', 'module.image_feature_extractor.head.0.0.bn2.running_mean', 'module.image_feature_extractor.head.0.0.bn2.running_var', 'module.image_feature_extractor.head.0.0.bn2.num_batches_tracked', 'module.image_feature_extractor.head.0.0.conv3.weight', 'module.image_feature_extractor.head.0.0.bn3.weight', 'module.image_feature_extractor.head.0.0.bn3.bias', 'module.image_feature_extractor.head.0.0.bn3.running_mean', 'module.image_feature_extractor.head.0.0.bn3.running_var', 'module.image_feature_extractor.head.0.0.bn3.num_batches_tracked', 'module.image_feature_extractor.head.0.0.downsample.0.weight', 'module.image_feature_extractor.head.0.0.downsample.1.weight', 'module.image_feature_extractor.head.0.0.downsample.1.bias', 'module.image_feature_extractor.head.0.0.downsample.1.running_mean', 'module.image_feature_extractor.head.0.0.downsample.1.running_var', 'module.image_feature_extractor.head.0.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.head.0.1.conv1.weight', 'module.image_feature_extractor.head.0.1.bn1.weight', 'module.image_feature_extractor.head.0.1.bn1.bias', 'module.image_feature_extractor.head.0.1.bn1.running_mean', 'module.image_feature_extractor.head.0.1.bn1.running_var', 'module.image_feature_extractor.head.0.1.bn1.num_batches_tracked', 'module.image_feature_extractor.head.0.1.conv2.weight', 'module.image_feature_extractor.head.0.1.bn2.weight', 'module.image_feature_extractor.head.0.1.bn2.bias', 'module.image_feature_extractor.head.0.1.bn2.running_mean', 'module.image_feature_extractor.head.0.1.bn2.running_var', 'module.image_feature_extractor.head.0.1.bn2.num_batches_tracked', 'module.image_feature_extractor.head.0.1.conv3.weight', 'module.image_feature_extractor.head.0.1.bn3.weight', 'module.image_feature_extractor.head.0.1.bn3.bias', 'module.image_feature_extractor.head.0.1.bn3.running_mean', 'module.image_feature_extractor.head.0.1.bn3.running_var', 'module.image_feature_extractor.head.0.1.bn3.num_batches_tracked', 'module.image_feature_extractor.head.0.2.conv1.weight', 'module.image_feature_extractor.head.0.2.bn1.weight', 'module.image_feature_extractor.head.0.2.bn1.bias', 'module.image_feature_extractor.head.0.2.bn1.running_mean', 'module.image_feature_extractor.head.0.2.bn1.running_var', 'module.image_feature_extractor.head.0.2.bn1.num_batches_tracked', 'module.image_feature_extractor.head.0.2.conv2.weight', 'module.image_feature_extractor.head.0.2.bn2.weight', 'module.image_feature_extractor.head.0.2.bn2.bias', 'module.image_feature_extractor.head.0.2.bn2.running_mean', 'module.image_feature_extractor.head.0.2.bn2.running_var', 'module.image_feature_extractor.head.0.2.bn2.num_batches_tracked', 'module.image_feature_extractor.head.0.2.conv3.weight', 'module.image_feature_extractor.head.0.2.bn3.weight', 'module.image_feature_extractor.head.0.2.bn3.bias', 'module.image_feature_extractor.head.0.2.bn3.running_mean', 'module.image_feature_extractor.head.0.2.bn3.running_var', 'module.image_feature_extractor.head.0.2.bn3.num_batches_tracked', 'module.image_feature_extractor.obj_downsample.1.weight', 'module.image_feature_extractor.obj_downsample.1.bias', 'module.object_linguistic_embeddings.weight', 'module.vlbert.word_embeddings.weight', 'module.vlbert.end_embedding.weight', 'module.vlbert.position_embeddings.weight', 'module.vlbert.token_type_embeddings.weight', 'module.vlbert.embedding_LayerNorm.weight', 'module.vlbert.embedding_LayerNorm.bias', 'module.vlbert.visual_ln_text.weight', 'module.vlbert.visual_ln_text.bias', 'module.vlbert.visual_ln_object.weight', 'module.vlbert.visual_ln_object.bias', 'module.vlbert.encoder.layer.0.attention.self.query.weight', 'module.vlbert.encoder.layer.0.attention.self.query.bias', 'module.vlbert.encoder.layer.0.attention.self.key.weight', 'module.vlbert.encoder.layer.0.attention.self.key.bias', 'module.vlbert.encoder.layer.0.attention.self.value.weight', 'module.vlbert.encoder.layer.0.attention.self.value.bias', 'module.vlbert.encoder.layer.0.attention.output.dense.weight', 'module.vlbert.encoder.layer.0.attention.output.dense.bias', 'module.vlbert.encoder.layer.0.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.0.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.0.intermediate.dense.weight', 'module.vlbert.encoder.layer.0.intermediate.dense.bias', 'module.vlbert.encoder.layer.0.output.dense.weight', 'module.vlbert.encoder.layer.0.output.dense.bias', 'module.vlbert.encoder.layer.0.output.LayerNorm.weight', 'module.vlbert.encoder.layer.0.output.LayerNorm.bias', 'module.vlbert.encoder.layer.1.attention.self.query.weight', 'module.vlbert.encoder.layer.1.attention.self.query.bias', 'module.vlbert.encoder.layer.1.attention.self.key.weight', 'module.vlbert.encoder.layer.1.attention.self.key.bias', 'module.vlbert.encoder.layer.1.attention.self.value.weight', 'module.vlbert.encoder.layer.1.attention.self.value.bias', 'module.vlbert.encoder.layer.1.attention.output.dense.weight', 'module.vlbert.encoder.layer.1.attention.output.dense.bias', 'module.vlbert.encoder.layer.1.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.1.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.1.intermediate.dense.weight', 'module.vlbert.encoder.layer.1.intermediate.dense.bias', 'module.vlbert.encoder.layer.1.output.dense.weight', 'module.vlbert.encoder.layer.1.output.dense.bias', 'module.vlbert.encoder.layer.1.output.LayerNorm.weight', 'module.vlbert.encoder.layer.1.output.LayerNorm.bias', 'module.vlbert.encoder.layer.2.attention.self.query.weight', 'module.vlbert.encoder.layer.2.attention.self.query.bias', 'module.vlbert.encoder.layer.2.attention.self.key.weight', 'module.vlbert.encoder.layer.2.attention.self.key.bias', 'module.vlbert.encoder.layer.2.attention.self.value.weight', 'module.vlbert.encoder.layer.2.attention.self.value.bias', 'module.vlbert.encoder.layer.2.attention.output.dense.weight', 'module.vlbert.encoder.layer.2.attention.output.dense.bias', 'module.vlbert.encoder.layer.2.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.2.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.2.intermediate.dense.weight', 'module.vlbert.encoder.layer.2.intermediate.dense.bias', 'module.vlbert.encoder.layer.2.output.dense.weight', 'module.vlbert.encoder.layer.2.output.dense.bias', 'module.vlbert.encoder.layer.2.output.LayerNorm.weight', 'module.vlbert.encoder.layer.2.output.LayerNorm.bias', 'module.vlbert.encoder.layer.3.attention.self.query.weight', 'module.vlbert.encoder.layer.3.attention.self.query.bias', 'module.vlbert.encoder.layer.3.attention.self.key.weight', 'module.vlbert.encoder.layer.3.attention.self.key.bias', 'module.vlbert.encoder.layer.3.attention.self.value.weight', 'module.vlbert.encoder.layer.3.attention.self.value.bias', 'module.vlbert.encoder.layer.3.attention.output.dense.weight', 'module.vlbert.encoder.layer.3.attention.output.dense.bias', 'module.vlbert.encoder.layer.3.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.3.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.3.intermediate.dense.weight', 'module.vlbert.encoder.layer.3.intermediate.dense.bias', 'module.vlbert.encoder.layer.3.output.dense.weight', 'module.vlbert.encoder.layer.3.output.dense.bias', 'module.vlbert.encoder.layer.3.output.LayerNorm.weight', 'module.vlbert.encoder.layer.3.output.LayerNorm.bias', 'module.vlbert.encoder.layer.4.attention.self.query.weight', 'module.vlbert.encoder.layer.4.attention.self.query.bias', 'module.vlbert.encoder.layer.4.attention.self.key.weight', 'module.vlbert.encoder.layer.4.attention.self.key.bias', 'module.vlbert.encoder.layer.4.attention.self.value.weight', 'module.vlbert.encoder.layer.4.attention.self.value.bias', 'module.vlbert.encoder.layer.4.attention.output.dense.weight', 'module.vlbert.encoder.layer.4.attention.output.dense.bias', 'module.vlbert.encoder.layer.4.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.4.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.4.intermediate.dense.weight', 'module.vlbert.encoder.layer.4.intermediate.dense.bias', 'module.vlbert.encoder.layer.4.output.dense.weight', 'module.vlbert.encoder.layer.4.output.dense.bias', 'module.vlbert.encoder.layer.4.output.LayerNorm.weight', 'module.vlbert.encoder.layer.4.output.LayerNorm.bias', 'module.vlbert.encoder.layer.5.attention.self.query.weight', 'module.vlbert.encoder.layer.5.attention.self.query.bias', 'module.vlbert.encoder.layer.5.attention.self.key.weight', 'module.vlbert.encoder.layer.5.attention.self.key.bias', 'module.vlbert.encoder.layer.5.attention.self.value.weight', 'module.vlbert.encoder.layer.5.attention.self.value.bias', 'module.vlbert.encoder.layer.5.attention.output.dense.weight', 'module.vlbert.encoder.layer.5.attention.output.dense.bias', 'module.vlbert.encoder.layer.5.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.5.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.5.intermediate.dense.weight', 'module.vlbert.encoder.layer.5.intermediate.dense.bias', 'module.vlbert.encoder.layer.5.output.dense.weight', 'module.vlbert.encoder.layer.5.output.dense.bias', 'module.vlbert.encoder.layer.5.output.LayerNorm.weight', 'module.vlbert.encoder.layer.5.output.LayerNorm.bias', 'module.vlbert.encoder.layer.6.attention.self.query.weight', 'module.vlbert.encoder.layer.6.attention.self.query.bias', 'module.vlbert.encoder.layer.6.attention.self.key.weight', 'module.vlbert.encoder.layer.6.attention.self.key.bias', 'module.vlbert.encoder.layer.6.attention.self.value.weight', 'module.vlbert.encoder.layer.6.attention.self.value.bias', 'module.vlbert.encoder.layer.6.attention.output.dense.weight', 'module.vlbert.encoder.layer.6.attention.output.dense.bias', 'module.vlbert.encoder.layer.6.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.6.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.6.intermediate.dense.weight', 'module.vlbert.encoder.layer.6.intermediate.dense.bias', 'module.vlbert.encoder.layer.6.output.dense.weight', 'module.vlbert.encoder.layer.6.output.dense.bias', 'module.vlbert.encoder.layer.6.output.LayerNorm.weight', 'module.vlbert.encoder.layer.6.output.LayerNorm.bias', 'module.vlbert.encoder.layer.7.attention.self.query.weight', 'module.vlbert.encoder.layer.7.attention.self.query.bias', 'module.vlbert.encoder.layer.7.attention.self.key.weight', 'module.vlbert.encoder.layer.7.attention.self.key.bias', 'module.vlbert.encoder.layer.7.attention.self.value.weight', 'module.vlbert.encoder.layer.7.attention.self.value.bias', 'module.vlbert.encoder.layer.7.attention.output.dense.weight', 'module.vlbert.encoder.layer.7.attention.output.dense.bias', 'module.vlbert.encoder.layer.7.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.7.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.7.intermediate.dense.weight', 'module.vlbert.encoder.layer.7.intermediate.dense.bias', 'module.vlbert.encoder.layer.7.output.dense.weight', 'module.vlbert.encoder.layer.7.output.dense.bias', 'module.vlbert.encoder.layer.7.output.LayerNorm.weight', 'module.vlbert.encoder.layer.7.output.LayerNorm.bias', 'module.vlbert.encoder.layer.8.attention.self.query.weight', 'module.vlbert.encoder.layer.8.attention.self.query.bias', 'module.vlbert.encoder.layer.8.attention.self.key.weight', 'module.vlbert.encoder.layer.8.attention.self.key.bias', 'module.vlbert.encoder.layer.8.attention.self.value.weight', 'module.vlbert.encoder.layer.8.attention.self.value.bias', 'module.vlbert.encoder.layer.8.attention.output.dense.weight', 'module.vlbert.encoder.layer.8.attention.output.dense.bias', 'module.vlbert.encoder.layer.8.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.8.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.8.intermediate.dense.weight', 'module.vlbert.encoder.layer.8.intermediate.dense.bias', 'module.vlbert.encoder.layer.8.output.dense.weight', 'module.vlbert.encoder.layer.8.output.dense.bias', 'module.vlbert.encoder.layer.8.output.LayerNorm.weight', 'module.vlbert.encoder.layer.8.output.LayerNorm.bias', 'module.vlbert.encoder.layer.9.attention.self.query.weight', 'module.vlbert.encoder.layer.9.attention.self.query.bias', 'module.vlbert.encoder.layer.9.attention.self.key.weight', 'module.vlbert.encoder.layer.9.attention.self.key.bias', 'module.vlbert.encoder.layer.9.attention.self.value.weight', 'module.vlbert.encoder.layer.9.attention.self.value.bias', 'module.vlbert.encoder.layer.9.attention.output.dense.weight', 'module.vlbert.encoder.layer.9.attention.output.dense.bias', 'module.vlbert.encoder.layer.9.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.9.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.9.intermediate.dense.weight', 'module.vlbert.encoder.layer.9.intermediate.dense.bias', 'module.vlbert.encoder.layer.9.output.dense.weight', 'module.vlbert.encoder.layer.9.output.dense.bias', 'module.vlbert.encoder.layer.9.output.LayerNorm.weight', 'module.vlbert.encoder.layer.9.output.LayerNorm.bias', 'module.vlbert.encoder.layer.10.attention.self.query.weight', 'module.vlbert.encoder.layer.10.attention.self.query.bias', 'module.vlbert.encoder.layer.10.attention.self.key.weight', 'module.vlbert.encoder.layer.10.attention.self.key.bias', 'module.vlbert.encoder.layer.10.attention.self.value.weight', 'module.vlbert.encoder.layer.10.attention.self.value.bias', 'module.vlbert.encoder.layer.10.attention.output.dense.weight', 'module.vlbert.encoder.layer.10.attention.output.dense.bias', 'module.vlbert.encoder.layer.10.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.10.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.10.intermediate.dense.weight', 'module.vlbert.encoder.layer.10.intermediate.dense.bias', 'module.vlbert.encoder.layer.10.output.dense.weight', 'module.vlbert.encoder.layer.10.output.dense.bias', 'module.vlbert.encoder.layer.10.output.LayerNorm.weight', 'module.vlbert.encoder.layer.10.output.LayerNorm.bias', 'module.vlbert.encoder.layer.11.attention.self.query.weight', 'module.vlbert.encoder.layer.11.attention.self.query.bias', 'module.vlbert.encoder.layer.11.attention.self.key.weight', 'module.vlbert.encoder.layer.11.attention.self.key.bias', 'module.vlbert.encoder.layer.11.attention.self.value.weight', 'module.vlbert.encoder.layer.11.attention.self.value.bias', 'module.vlbert.encoder.layer.11.attention.output.dense.weight', 'module.vlbert.encoder.layer.11.attention.output.dense.bias', 'module.vlbert.encoder.layer.11.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.11.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.11.intermediate.dense.weight', 'module.vlbert.encoder.layer.11.intermediate.dense.bias', 'module.vlbert.encoder.layer.11.output.dense.weight', 'module.vlbert.encoder.layer.11.output.dense.bias', 'module.vlbert.encoder.layer.11.output.LayerNorm.weight', 'module.vlbert.encoder.layer.11.output.LayerNorm.bias', 'module.vlbert.mlm_head.predictions.bias', 'module.vlbert.mlm_head.predictions.transform.dense.weight', 'module.vlbert.mlm_head.predictions.transform.dense.bias', 'module.vlbert.mlm_head.predictions.transform.LayerNorm.weight', 'module.vlbert.mlm_head.predictions.transform.LayerNorm.bias', 'module.vlbert.mlm_head.predictions.decoder.weight'])
[Partial Load] non matched keys: ['object_mask_word_embedding.weight', 'aux_text_visual_embedding.weight', 'vlbert.mvrc_head.transform.dense.weight', 'vlbert.mvrc_head.transform.dense.bias', 'vlbert.mvrc_head.region_cls_pred.weight', 'vlbert.mvrc_head.region_cls_pred.bias']
[Partial Load] non pretrain keys: []
[Partial Load] partial load state dict of keys: dict_keys(['module.image_feature_extractor.backbone.conv1.weight', 'module.image_feature_extractor.backbone.bn1.weight', 'module.image_feature_extractor.backbone.bn1.bias', 'module.image_feature_extractor.backbone.bn1.running_mean', 'module.image_feature_extractor.backbone.bn1.running_var', 'module.image_feature_extractor.backbone.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.conv1.weight', 'module.image_feature_extractor.backbone.layer1.0.bn1.weight', 'module.image_feature_extractor.backbone.layer1.0.bn1.bias', 'module.image_feature_extractor.backbone.layer1.0.bn1.running_mean', 'module.image_feature_extractor.backbone.layer1.0.bn1.running_var', 'module.image_feature_extractor.backbone.layer1.0.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.conv2.weight', 'module.image_feature_extractor.backbone.layer1.0.bn2.weight', 'module.image_feature_extractor.backbone.layer1.0.bn2.bias', 'module.image_feature_extractor.backbone.layer1.0.bn2.running_mean', 'module.image_feature_extractor.backbone.layer1.0.bn2.running_var', 'module.image_feature_extractor.backbone.layer1.0.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.conv3.weight', 'module.image_feature_extractor.backbone.layer1.0.bn3.weight', 'module.image_feature_extractor.backbone.layer1.0.bn3.bias', 'module.image_feature_extractor.backbone.layer1.0.bn3.running_mean', 'module.image_feature_extractor.backbone.layer1.0.bn3.running_var', 'module.image_feature_extractor.backbone.layer1.0.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.downsample.0.weight', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.weight', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.bias', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.running_mean', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.running_var', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.1.conv1.weight', 'module.image_feature_extractor.backbone.layer1.1.bn1.weight', 'module.image_feature_extractor.backbone.layer1.1.bn1.bias', 'module.image_feature_extractor.backbone.layer1.1.bn1.running_mean', 'module.image_feature_extractor.backbone.layer1.1.bn1.running_var', 'module.image_feature_extractor.backbone.layer1.1.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.1.conv2.weight', 'module.image_feature_extractor.backbone.layer1.1.bn2.weight', 'module.image_feature_extractor.backbone.layer1.1.bn2.bias', 'module.image_feature_extractor.backbone.layer1.1.bn2.running_mean', 'module.image_feature_extractor.backbone.layer1.1.bn2.running_var', 'module.image_feature_extractor.backbone.layer1.1.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.1.conv3.weight', 'module.image_feature_extractor.backbone.layer1.1.bn3.weight', 'module.image_feature_extractor.backbone.layer1.1.bn3.bias', 'module.image_feature_extractor.backbone.layer1.1.bn3.running_mean', 'module.image_feature_extractor.backbone.layer1.1.bn3.running_var', 'module.image_feature_extractor.backbone.layer1.1.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.2.conv1.weight', 'module.image_feature_extractor.backbone.layer1.2.bn1.weight', 'module.image_feature_extractor.backbone.layer1.2.bn1.bias', 'module.image_feature_extractor.backbone.layer1.2.bn1.running_mean', 'module.image_feature_extractor.backbone.layer1.2.bn1.running_var', 'module.image_feature_extractor.backbone.layer1.2.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.2.conv2.weight', 'module.image_feature_extractor.backbone.layer1.2.bn2.weight', 'module.image_feature_extractor.backbone.layer1.2.bn2.bias', 'module.image_feature_extractor.backbone.layer1.2.bn2.running_mean', 'module.image_feature_extractor.backbone.layer1.2.bn2.running_var', 'module.image_feature_extractor.backbone.layer1.2.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.2.conv3.weight', 'module.image_feature_extractor.backbone.layer1.2.bn3.weight', 'module.image_feature_extractor.backbone.layer1.2.bn3.bias', 'module.image_feature_extractor.backbone.layer1.2.bn3.running_mean', 'module.image_feature_extractor.backbone.layer1.2.bn3.running_var', 'module.image_feature_extractor.backbone.layer1.2.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.conv1.weight', 'module.image_feature_extractor.backbone.layer2.0.bn1.weight', 'module.image_feature_extractor.backbone.layer2.0.bn1.bias', 'module.image_feature_extractor.backbone.layer2.0.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.0.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.0.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.conv2.weight', 'module.image_feature_extractor.backbone.layer2.0.bn2.weight', 'module.image_feature_extractor.backbone.layer2.0.bn2.bias', 'module.image_feature_extractor.backbone.layer2.0.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.0.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.0.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.conv3.weight', 'module.image_feature_extractor.backbone.layer2.0.bn3.weight', 'module.image_feature_extractor.backbone.layer2.0.bn3.bias', 'module.image_feature_extractor.backbone.layer2.0.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.0.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.0.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.downsample.0.weight', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.weight', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.bias', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.running_mean', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.running_var', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.1.conv1.weight', 'module.image_feature_extractor.backbone.layer2.1.bn1.weight', 'module.image_feature_extractor.backbone.layer2.1.bn1.bias', 'module.image_feature_extractor.backbone.layer2.1.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.1.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.1.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.1.conv2.weight', 'module.image_feature_extractor.backbone.layer2.1.bn2.weight', 'module.image_feature_extractor.backbone.layer2.1.bn2.bias', 'module.image_feature_extractor.backbone.layer2.1.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.1.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.1.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.1.conv3.weight', 'module.image_feature_extractor.backbone.layer2.1.bn3.weight', 'module.image_feature_extractor.backbone.layer2.1.bn3.bias', 'module.image_feature_extractor.backbone.layer2.1.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.1.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.1.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.2.conv1.weight', 'module.image_feature_extractor.backbone.layer2.2.bn1.weight', 'module.image_feature_extractor.backbone.layer2.2.bn1.bias', 'module.image_feature_extractor.backbone.layer2.2.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.2.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.2.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.2.conv2.weight', 'module.image_feature_extractor.backbone.layer2.2.bn2.weight', 'module.image_feature_extractor.backbone.layer2.2.bn2.bias', 'module.image_feature_extractor.backbone.layer2.2.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.2.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.2.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.2.conv3.weight', 'module.image_feature_extractor.backbone.layer2.2.bn3.weight', 'module.image_feature_extractor.backbone.layer2.2.bn3.bias', 'module.image_feature_extractor.backbone.layer2.2.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.2.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.2.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.3.conv1.weight', 'module.image_feature_extractor.backbone.layer2.3.bn1.weight', 'module.image_feature_extractor.backbone.layer2.3.bn1.bias', 'module.image_feature_extractor.backbone.layer2.3.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.3.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.3.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.3.conv2.weight', 'module.image_feature_extractor.backbone.layer2.3.bn2.weight', 'module.image_feature_extractor.backbone.layer2.3.bn2.bias', 'module.image_feature_extractor.backbone.layer2.3.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.3.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.3.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.3.conv3.weight', 'module.image_feature_extractor.backbone.layer2.3.bn3.weight', 'module.image_feature_extractor.backbone.layer2.3.bn3.bias', 'module.image_feature_extractor.backbone.layer2.3.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.3.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.3.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.conv1.weight', 'module.image_feature_extractor.backbone.layer3.0.bn1.weight', 'module.image_feature_extractor.backbone.layer3.0.bn1.bias', 'module.image_feature_extractor.backbone.layer3.0.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.0.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.0.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.conv2.weight', 'module.image_feature_extractor.backbone.layer3.0.bn2.weight', 'module.image_feature_extractor.backbone.layer3.0.bn2.bias', 'module.image_feature_extractor.backbone.layer3.0.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.0.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.0.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.conv3.weight', 'module.image_feature_extractor.backbone.layer3.0.bn3.weight', 'module.image_feature_extractor.backbone.layer3.0.bn3.bias', 'module.image_feature_extractor.backbone.layer3.0.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.0.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.0.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.downsample.0.weight', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.weight', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.bias', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.running_mean', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.running_var', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.1.conv1.weight', 'module.image_feature_extractor.backbone.layer3.1.bn1.weight', 'module.image_feature_extractor.backbone.layer3.1.bn1.bias', 'module.image_feature_extractor.backbone.layer3.1.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.1.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.1.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.1.conv2.weight', 'module.image_feature_extractor.backbone.layer3.1.bn2.weight', 'module.image_feature_extractor.backbone.layer3.1.bn2.bias', 'module.image_feature_extractor.backbone.layer3.1.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.1.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.1.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.1.conv3.weight', 'module.image_feature_extractor.backbone.layer3.1.bn3.weight', 'module.image_feature_extractor.backbone.layer3.1.bn3.bias', 'module.image_feature_extractor.backbone.layer3.1.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.1.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.1.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.2.conv1.weight', 'module.image_feature_extractor.backbone.layer3.2.bn1.weight', 'module.image_feature_extractor.backbone.layer3.2.bn1.bias', 'module.image_feature_extractor.backbone.layer3.2.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.2.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.2.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.2.conv2.weight', 'module.image_feature_extractor.backbone.layer3.2.bn2.weight', 'module.image_feature_extractor.backbone.layer3.2.bn2.bias', 'module.image_feature_extractor.backbone.layer3.2.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.2.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.2.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.2.conv3.weight', 'module.image_feature_extractor.backbone.layer3.2.bn3.weight', 'module.image_feature_extractor.backbone.layer3.2.bn3.bias', 'module.image_feature_extractor.backbone.layer3.2.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.2.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.2.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.3.conv1.weight', 'module.image_feature_extractor.backbone.layer3.3.bn1.weight', 'module.image_feature_extractor.backbone.layer3.3.bn1.bias', 'module.image_feature_extractor.backbone.layer3.3.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.3.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.3.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.3.conv2.weight', 'module.image_feature_extractor.backbone.layer3.3.bn2.weight', 'module.image_feature_extractor.backbone.layer3.3.bn2.bias', 'module.image_feature_extractor.backbone.layer3.3.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.3.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.3.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.3.conv3.weight', 'module.image_feature_extractor.backbone.layer3.3.bn3.weight', 'module.image_feature_extractor.backbone.layer3.3.bn3.bias', 'module.image_feature_extractor.backbone.layer3.3.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.3.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.3.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.4.conv1.weight', 'module.image_feature_extractor.backbone.layer3.4.bn1.weight', 'module.image_feature_extractor.backbone.layer3.4.bn1.bias', 'module.image_feature_extractor.backbone.layer3.4.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.4.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.4.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.4.conv2.weight', 'module.image_feature_extractor.backbone.layer3.4.bn2.weight', 'module.image_feature_extractor.backbone.layer3.4.bn2.bias', 'module.image_feature_extractor.backbone.layer3.4.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.4.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.4.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.4.conv3.weight', 'module.image_feature_extractor.backbone.layer3.4.bn3.weight', 'module.image_feature_extractor.backbone.layer3.4.bn3.bias', 'module.image_feature_extractor.backbone.layer3.4.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.4.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.4.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.5.conv1.weight', 'module.image_feature_extractor.backbone.layer3.5.bn1.weight', 'module.image_feature_extractor.backbone.layer3.5.bn1.bias', 'module.image_feature_extractor.backbone.layer3.5.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.5.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.5.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.5.conv2.weight', 'module.image_feature_extractor.backbone.layer3.5.bn2.weight', 'module.image_feature_extractor.backbone.layer3.5.bn2.bias', 'module.image_feature_extractor.backbone.layer3.5.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.5.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.5.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.5.conv3.weight', 'module.image_feature_extractor.backbone.layer3.5.bn3.weight', 'module.image_feature_extractor.backbone.layer3.5.bn3.bias', 'module.image_feature_extractor.backbone.layer3.5.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.5.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.5.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.6.conv1.weight', 'module.image_feature_extractor.backbone.layer3.6.bn1.weight', 'module.image_feature_extractor.backbone.layer3.6.bn1.bias', 'module.image_feature_extractor.backbone.layer3.6.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.6.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.6.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.6.conv2.weight', 'module.image_feature_extractor.backbone.layer3.6.bn2.weight', 'module.image_feature_extractor.backbone.layer3.6.bn2.bias', 'module.image_feature_extractor.backbone.layer3.6.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.6.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.6.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.6.conv3.weight', 'module.image_feature_extractor.backbone.layer3.6.bn3.weight', 'module.image_feature_extractor.backbone.layer3.6.bn3.bias', 'module.image_feature_extractor.backbone.layer3.6.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.6.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.6.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.7.conv1.weight', 'module.image_feature_extractor.backbone.layer3.7.bn1.weight', 'module.image_feature_extractor.backbone.layer3.7.bn1.bias', 'module.image_feature_extractor.backbone.layer3.7.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.7.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.7.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.7.conv2.weight', 'module.image_feature_extractor.backbone.layer3.7.bn2.weight', 'module.image_feature_extractor.backbone.layer3.7.bn2.bias', 'module.image_feature_extractor.backbone.layer3.7.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.7.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.7.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.7.conv3.weight', 'module.image_feature_extractor.backbone.layer3.7.bn3.weight', 'module.image_feature_extractor.backbone.layer3.7.bn3.bias', 'module.image_feature_extractor.backbone.layer3.7.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.7.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.7.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.8.conv1.weight', 'module.image_feature_extractor.backbone.layer3.8.bn1.weight', 'module.image_feature_extractor.backbone.layer3.8.bn1.bias', 'module.image_feature_extractor.backbone.layer3.8.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.8.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.8.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.8.conv2.weight', 'module.image_feature_extractor.backbone.layer3.8.bn2.weight', 'module.image_feature_extractor.backbone.layer3.8.bn2.bias', 'module.image_feature_extractor.backbone.layer3.8.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.8.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.8.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.8.conv3.weight', 'module.image_feature_extractor.backbone.layer3.8.bn3.weight', 'module.image_feature_extractor.backbone.layer3.8.bn3.bias', 'module.image_feature_extractor.backbone.layer3.8.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.8.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.8.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.9.conv1.weight', 'module.image_feature_extractor.backbone.layer3.9.bn1.weight', 'module.image_feature_extractor.backbone.layer3.9.bn1.bias', 'module.image_feature_extractor.backbone.layer3.9.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.9.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.9.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.9.conv2.weight', 'module.image_feature_extractor.backbone.layer3.9.bn2.weight', 'module.image_feature_extractor.backbone.layer3.9.bn2.bias', 'module.image_feature_extractor.backbone.layer3.9.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.9.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.9.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.9.conv3.weight', 'module.image_feature_extractor.backbone.layer3.9.bn3.weight', 'module.image_feature_extractor.backbone.layer3.9.bn3.bias', 'module.image_feature_extractor.backbone.layer3.9.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.9.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.9.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.10.conv1.weight', 'module.image_feature_extractor.backbone.layer3.10.bn1.weight', 'module.image_feature_extractor.backbone.layer3.10.bn1.bias', 'module.image_feature_extractor.backbone.layer3.10.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.10.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.10.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.10.conv2.weight', 'module.image_feature_extractor.backbone.layer3.10.bn2.weight', 'module.image_feature_extractor.backbone.layer3.10.bn2.bias', 'module.image_feature_extractor.backbone.layer3.10.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.10.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.10.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.10.conv3.weight', 'module.image_feature_extractor.backbone.layer3.10.bn3.weight', 'module.image_feature_extractor.backbone.layer3.10.bn3.bias', 'module.image_feature_extractor.backbone.layer3.10.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.10.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.10.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.11.conv1.weight', 'module.image_feature_extractor.backbone.layer3.11.bn1.weight', 'module.image_feature_extractor.backbone.layer3.11.bn1.bias', 'module.image_feature_extractor.backbone.layer3.11.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.11.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.11.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.11.conv2.weight', 'module.image_feature_extractor.backbone.layer3.11.bn2.weight', 'module.image_feature_extractor.backbone.layer3.11.bn2.bias', 'module.image_feature_extractor.backbone.layer3.11.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.11.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.11.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.11.conv3.weight', 'module.image_feature_extractor.backbone.layer3.11.bn3.weight', 'module.image_feature_extractor.backbone.layer3.11.bn3.bias', 'module.image_feature_extractor.backbone.layer3.11.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.11.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.11.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.12.conv1.weight', 'module.image_feature_extractor.backbone.layer3.12.bn1.weight', 'module.image_feature_extractor.backbone.layer3.12.bn1.bias', 'module.image_feature_extractor.backbone.layer3.12.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.12.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.12.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.12.conv2.weight', 'module.image_feature_extractor.backbone.layer3.12.bn2.weight', 'module.image_feature_extractor.backbone.layer3.12.bn2.bias', 'module.image_feature_extractor.backbone.layer3.12.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.12.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.12.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.12.conv3.weight', 'module.image_feature_extractor.backbone.layer3.12.bn3.weight', 'module.image_feature_extractor.backbone.layer3.12.bn3.bias', 'module.image_feature_extractor.backbone.layer3.12.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.12.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.12.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.13.conv1.weight', 'module.image_feature_extractor.backbone.layer3.13.bn1.weight', 'module.image_feature_extractor.backbone.layer3.13.bn1.bias', 'module.image_feature_extractor.backbone.layer3.13.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.13.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.13.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.13.conv2.weight', 'module.image_feature_extractor.backbone.layer3.13.bn2.weight', 'module.image_feature_extractor.backbone.layer3.13.bn2.bias', 'module.image_feature_extractor.backbone.layer3.13.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.13.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.13.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.13.conv3.weight', 'module.image_feature_extractor.backbone.layer3.13.bn3.weight', 'module.image_feature_extractor.backbone.layer3.13.bn3.bias', 'module.image_feature_extractor.backbone.layer3.13.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.13.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.13.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.14.conv1.weight', 'module.image_feature_extractor.backbone.layer3.14.bn1.weight', 'module.image_feature_extractor.backbone.layer3.14.bn1.bias', 'module.image_feature_extractor.backbone.layer3.14.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.14.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.14.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.14.conv2.weight', 'module.image_feature_extractor.backbone.layer3.14.bn2.weight', 'module.image_feature_extractor.backbone.layer3.14.bn2.bias', 'module.image_feature_extractor.backbone.layer3.14.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.14.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.14.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.14.conv3.weight', 'module.image_feature_extractor.backbone.layer3.14.bn3.weight', 'module.image_feature_extractor.backbone.layer3.14.bn3.bias', 'module.image_feature_extractor.backbone.layer3.14.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.14.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.14.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.15.conv1.weight', 'module.image_feature_extractor.backbone.layer3.15.bn1.weight', 'module.image_feature_extractor.backbone.layer3.15.bn1.bias', 'module.image_feature_extractor.backbone.layer3.15.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.15.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.15.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.15.conv2.weight', 'module.image_feature_extractor.backbone.layer3.15.bn2.weight', 'module.image_feature_extractor.backbone.layer3.15.bn2.bias', 'module.image_feature_extractor.backbone.layer3.15.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.15.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.15.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.15.conv3.weight', 'module.image_feature_extractor.backbone.layer3.15.bn3.weight', 'module.image_feature_extractor.backbone.layer3.15.bn3.bias', 'module.image_feature_extractor.backbone.layer3.15.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.15.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.15.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.16.conv1.weight', 'module.image_feature_extractor.backbone.layer3.16.bn1.weight', 'module.image_feature_extractor.backbone.layer3.16.bn1.bias', 'module.image_feature_extractor.backbone.layer3.16.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.16.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.16.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.16.conv2.weight', 'module.image_feature_extractor.backbone.layer3.16.bn2.weight', 'module.image_feature_extractor.backbone.layer3.16.bn2.bias', 'module.image_feature_extractor.backbone.layer3.16.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.16.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.16.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.16.conv3.weight', 'module.image_feature_extractor.backbone.layer3.16.bn3.weight', 'module.image_feature_extractor.backbone.layer3.16.bn3.bias', 'module.image_feature_extractor.backbone.layer3.16.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.16.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.16.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.17.conv1.weight', 'module.image_feature_extractor.backbone.layer3.17.bn1.weight', 'module.image_feature_extractor.backbone.layer3.17.bn1.bias', 'module.image_feature_extractor.backbone.layer3.17.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.17.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.17.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.17.conv2.weight', 'module.image_feature_extractor.backbone.layer3.17.bn2.weight', 'module.image_feature_extractor.backbone.layer3.17.bn2.bias', 'module.image_feature_extractor.backbone.layer3.17.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.17.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.17.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.17.conv3.weight', 'module.image_feature_extractor.backbone.layer3.17.bn3.weight', 'module.image_feature_extractor.backbone.layer3.17.bn3.bias', 'module.image_feature_extractor.backbone.layer3.17.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.17.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.17.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.18.conv1.weight', 'module.image_feature_extractor.backbone.layer3.18.bn1.weight', 'module.image_feature_extractor.backbone.layer3.18.bn1.bias', 'module.image_feature_extractor.backbone.layer3.18.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.18.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.18.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.18.conv2.weight', 'module.image_feature_extractor.backbone.layer3.18.bn2.weight', 'module.image_feature_extractor.backbone.layer3.18.bn2.bias', 'module.image_feature_extractor.backbone.layer3.18.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.18.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.18.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.18.conv3.weight', 'module.image_feature_extractor.backbone.layer3.18.bn3.weight', 'module.image_feature_extractor.backbone.layer3.18.bn3.bias', 'module.image_feature_extractor.backbone.layer3.18.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.18.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.18.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.19.conv1.weight', 'module.image_feature_extractor.backbone.layer3.19.bn1.weight', 'module.image_feature_extractor.backbone.layer3.19.bn1.bias', 'module.image_feature_extractor.backbone.layer3.19.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.19.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.19.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.19.conv2.weight', 'module.image_feature_extractor.backbone.layer3.19.bn2.weight', 'module.image_feature_extractor.backbone.layer3.19.bn2.bias', 'module.image_feature_extractor.backbone.layer3.19.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.19.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.19.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.19.conv3.weight', 'module.image_feature_extractor.backbone.layer3.19.bn3.weight', 'module.image_feature_extractor.backbone.layer3.19.bn3.bias', 'module.image_feature_extractor.backbone.layer3.19.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.19.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.19.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.20.conv1.weight', 'module.image_feature_extractor.backbone.layer3.20.bn1.weight', 'module.image_feature_extractor.backbone.layer3.20.bn1.bias', 'module.image_feature_extractor.backbone.layer3.20.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.20.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.20.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.20.conv2.weight', 'module.image_feature_extractor.backbone.layer3.20.bn2.weight', 'module.image_feature_extractor.backbone.layer3.20.bn2.bias', 'module.image_feature_extractor.backbone.layer3.20.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.20.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.20.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.20.conv3.weight', 'module.image_feature_extractor.backbone.layer3.20.bn3.weight', 'module.image_feature_extractor.backbone.layer3.20.bn3.bias', 'module.image_feature_extractor.backbone.layer3.20.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.20.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.20.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.21.conv1.weight', 'module.image_feature_extractor.backbone.layer3.21.bn1.weight', 'module.image_feature_extractor.backbone.layer3.21.bn1.bias', 'module.image_feature_extractor.backbone.layer3.21.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.21.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.21.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.21.conv2.weight', 'module.image_feature_extractor.backbone.layer3.21.bn2.weight', 'module.image_feature_extractor.backbone.layer3.21.bn2.bias', 'module.image_feature_extractor.backbone.layer3.21.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.21.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.21.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.21.conv3.weight', 'module.image_feature_extractor.backbone.layer3.21.bn3.weight', 'module.image_feature_extractor.backbone.layer3.21.bn3.bias', 'module.image_feature_extractor.backbone.layer3.21.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.21.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.21.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.22.conv1.weight', 'module.image_feature_extractor.backbone.layer3.22.bn1.weight', 'module.image_feature_extractor.backbone.layer3.22.bn1.bias', 'module.image_feature_extractor.backbone.layer3.22.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.22.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.22.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.22.conv2.weight', 'module.image_feature_extractor.backbone.layer3.22.bn2.weight', 'module.image_feature_extractor.backbone.layer3.22.bn2.bias', 'module.image_feature_extractor.backbone.layer3.22.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.22.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.22.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.22.conv3.weight', 'module.image_feature_extractor.backbone.layer3.22.bn3.weight', 'module.image_feature_extractor.backbone.layer3.22.bn3.bias', 'module.image_feature_extractor.backbone.layer3.22.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.22.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.22.bn3.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.conv1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.conv2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.conv3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.0.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.1.conv1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.1.conv2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.bias', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.1.conv3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.bias', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.2.conv1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.2.conv2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.bias', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.2.conv3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.bias', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.num_batches_tracked', 'module.image_feature_extractor.head.0.0.conv1.weight', 'module.image_feature_extractor.head.0.0.bn1.weight', 'module.image_feature_extractor.head.0.0.bn1.bias', 'module.image_feature_extractor.head.0.0.bn1.running_mean', 'module.image_feature_extractor.head.0.0.bn1.running_var', 'module.image_feature_extractor.head.0.0.bn1.num_batches_tracked', 'module.image_feature_extractor.head.0.0.conv2.weight', 'module.image_feature_extractor.head.0.0.bn2.weight', 'module.image_feature_extractor.head.0.0.bn2.bias', 'module.image_feature_extractor.head.0.0.bn2.running_mean', 'module.image_feature_extractor.head.0.0.bn2.running_var', 'module.image_feature_extractor.head.0.0.bn2.num_batches_tracked', 'module.image_feature_extractor.head.0.0.conv3.weight', 'module.image_feature_extractor.head.0.0.bn3.weight', 'module.image_feature_extractor.head.0.0.bn3.bias', 'module.image_feature_extractor.head.0.0.bn3.running_mean', 'module.image_feature_extractor.head.0.0.bn3.running_var', 'module.image_feature_extractor.head.0.0.bn3.num_batches_tracked', 'module.image_feature_extractor.head.0.0.downsample.0.weight', 'module.image_feature_extractor.head.0.0.downsample.1.weight', 'module.image_feature_extractor.head.0.0.downsample.1.bias', 'module.image_feature_extractor.head.0.0.downsample.1.running_mean', 'module.image_feature_extractor.head.0.0.downsample.1.running_var', 'module.image_feature_extractor.head.0.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.head.0.1.conv1.weight', 'module.image_feature_extractor.head.0.1.bn1.weight', 'module.image_feature_extractor.head.0.1.bn1.bias', 'module.image_feature_extractor.head.0.1.bn1.running_mean', 'module.image_feature_extractor.head.0.1.bn1.running_var', 'module.image_feature_extractor.head.0.1.bn1.num_batches_tracked', 'module.image_feature_extractor.head.0.1.conv2.weight', 'module.image_feature_extractor.head.0.1.bn2.weight', 'module.image_feature_extractor.head.0.1.bn2.bias', 'module.image_feature_extractor.head.0.1.bn2.running_mean', 'module.image_feature_extractor.head.0.1.bn2.running_var', 'module.image_feature_extractor.head.0.1.bn2.num_batches_tracked', 'module.image_feature_extractor.head.0.1.conv3.weight', 'module.image_feature_extractor.head.0.1.bn3.weight', 'module.image_feature_extractor.head.0.1.bn3.bias', 'module.image_feature_extractor.head.0.1.bn3.running_mean', 'module.image_feature_extractor.head.0.1.bn3.running_var', 'module.image_feature_extractor.head.0.1.bn3.num_batches_tracked', 'module.image_feature_extractor.head.0.2.conv1.weight', 'module.image_feature_extractor.head.0.2.bn1.weight', 'module.image_feature_extractor.head.0.2.bn1.bias', 'module.image_feature_extractor.head.0.2.bn1.running_mean', 'module.image_feature_extractor.head.0.2.bn1.running_var', 'module.image_feature_extractor.head.0.2.bn1.num_batches_tracked', 'module.image_feature_extractor.head.0.2.conv2.weight', 'module.image_feature_extractor.head.0.2.bn2.weight', 'module.image_feature_extractor.head.0.2.bn2.bias', 'module.image_feature_extractor.head.0.2.bn2.running_mean', 'module.image_feature_extractor.head.0.2.bn2.running_var', 'module.image_feature_extractor.head.0.2.bn2.num_batches_tracked', 'module.image_feature_extractor.head.0.2.conv3.weight', 'module.image_feature_extractor.head.0.2.bn3.weight', 'module.image_feature_extractor.head.0.2.bn3.bias', 'module.image_feature_extractor.head.0.2.bn3.running_mean', 'module.image_feature_extractor.head.0.2.bn3.running_var', 'module.image_feature_extractor.head.0.2.bn3.num_batches_tracked', 'module.image_feature_extractor.obj_downsample.1.weight', 'module.image_feature_extractor.obj_downsample.1.bias', 'module.object_linguistic_embeddings.weight', 'module.vlbert.word_embeddings.weight', 'module.vlbert.end_embedding.weight', 'module.vlbert.position_embeddings.weight', 'module.vlbert.token_type_embeddings.weight', 'module.vlbert.embedding_LayerNorm.weight', 'module.vlbert.embedding_LayerNorm.bias', 'module.vlbert.visual_ln_text.weight', 'module.vlbert.visual_ln_text.bias', 'module.vlbert.visual_ln_object.weight', 'module.vlbert.visual_ln_object.bias', 'module.vlbert.encoder.layer.0.attention.self.query.weight', 'module.vlbert.encoder.layer.0.attention.self.query.bias', 'module.vlbert.encoder.layer.0.attention.self.key.weight', 'module.vlbert.encoder.layer.0.attention.self.key.bias', 'module.vlbert.encoder.layer.0.attention.self.value.weight', 'module.vlbert.encoder.layer.0.attention.self.value.bias', 'module.vlbert.encoder.layer.0.attention.output.dense.weight', 'module.vlbert.encoder.layer.0.attention.output.dense.bias', 'module.vlbert.encoder.layer.0.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.0.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.0.intermediate.dense.weight', 'module.vlbert.encoder.layer.0.intermediate.dense.bias', 'module.vlbert.encoder.layer.0.output.dense.weight', 'module.vlbert.encoder.layer.0.output.dense.bias', 'module.vlbert.encoder.layer.0.output.LayerNorm.weight', 'module.vlbert.encoder.layer.0.output.LayerNorm.bias', 'module.vlbert.encoder.layer.1.attention.self.query.weight', 'module.vlbert.encoder.layer.1.attention.self.query.bias', 'module.vlbert.encoder.layer.1.attention.self.key.weight', 'module.vlbert.encoder.layer.1.attention.self.key.bias', 'module.vlbert.encoder.layer.1.attention.self.value.weight', 'module.vlbert.encoder.layer.1.attention.self.value.bias', 'module.vlbert.encoder.layer.1.attention.output.dense.weight', 'module.vlbert.encoder.layer.1.attention.output.dense.bias', 'module.vlbert.encoder.layer.1.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.1.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.1.intermediate.dense.weight', 'module.vlbert.encoder.layer.1.intermediate.dense.bias', 'module.vlbert.encoder.layer.1.output.dense.weight', 'module.vlbert.encoder.layer.1.output.dense.bias', 'module.vlbert.encoder.layer.1.output.LayerNorm.weight', 'module.vlbert.encoder.layer.1.output.LayerNorm.bias', 'module.vlbert.encoder.layer.2.attention.self.query.weight', 'module.vlbert.encoder.layer.2.attention.self.query.bias', 'module.vlbert.encoder.layer.2.attention.self.key.weight', 'module.vlbert.encoder.layer.2.attention.self.key.bias', 'module.vlbert.encoder.layer.2.attention.self.value.weight', 'module.vlbert.encoder.layer.2.attention.self.value.bias', 'module.vlbert.encoder.layer.2.attention.output.dense.weight', 'module.vlbert.encoder.layer.2.attention.output.dense.bias', 'module.vlbert.encoder.layer.2.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.2.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.2.intermediate.dense.weight', 'module.vlbert.encoder.layer.2.intermediate.dense.bias', 'module.vlbert.encoder.layer.2.output.dense.weight', 'module.vlbert.encoder.layer.2.output.dense.bias', 'module.vlbert.encoder.layer.2.output.LayerNorm.weight', 'module.vlbert.encoder.layer.2.output.LayerNorm.bias', 'module.vlbert.encoder.layer.3.attention.self.query.weight', 'module.vlbert.encoder.layer.3.attention.self.query.bias', 'module.vlbert.encoder.layer.3.attention.self.key.weight', 'module.vlbert.encoder.layer.3.attention.self.key.bias', 'module.vlbert.encoder.layer.3.attention.self.value.weight', 'module.vlbert.encoder.layer.3.attention.self.value.bias', 'module.vlbert.encoder.layer.3.attention.output.dense.weight', 'module.vlbert.encoder.layer.3.attention.output.dense.bias', 'module.vlbert.encoder.layer.3.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.3.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.3.intermediate.dense.weight', 'module.vlbert.encoder.layer.3.intermediate.dense.bias', 'module.vlbert.encoder.layer.3.output.dense.weight', 'module.vlbert.encoder.layer.3.output.dense.bias', 'module.vlbert.encoder.layer.3.output.LayerNorm.weight', 'module.vlbert.encoder.layer.3.output.LayerNorm.bias', 'module.vlbert.encoder.layer.4.attention.self.query.weight', 'module.vlbert.encoder.layer.4.attention.self.query.bias', 'module.vlbert.encoder.layer.4.attention.self.key.weight', 'module.vlbert.encoder.layer.4.attention.self.key.bias', 'module.vlbert.encoder.layer.4.attention.self.value.weight', 'module.vlbert.encoder.layer.4.attention.self.value.bias', 'module.vlbert.encoder.layer.4.attention.output.dense.weight', 'module.vlbert.encoder.layer.4.attention.output.dense.bias', 'module.vlbert.encoder.layer.4.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.4.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.4.intermediate.dense.weight', 'module.vlbert.encoder.layer.4.intermediate.dense.bias', 'module.vlbert.encoder.layer.4.output.dense.weight', 'module.vlbert.encoder.layer.4.output.dense.bias', 'module.vlbert.encoder.layer.4.output.LayerNorm.weight', 'module.vlbert.encoder.layer.4.output.LayerNorm.bias', 'module.vlbert.encoder.layer.5.attention.self.query.weight', 'module.vlbert.encoder.layer.5.attention.self.query.bias', 'module.vlbert.encoder.layer.5.attention.self.key.weight', 'module.vlbert.encoder.layer.5.attention.self.key.bias', 'module.vlbert.encoder.layer.5.attention.self.value.weight', 'module.vlbert.encoder.layer.5.attention.self.value.bias', 'module.vlbert.encoder.layer.5.attention.output.dense.weight', 'module.vlbert.encoder.layer.5.attention.output.dense.bias', 'module.vlbert.encoder.layer.5.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.5.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.5.intermediate.dense.weight', 'module.vlbert.encoder.layer.5.intermediate.dense.bias', 'module.vlbert.encoder.layer.5.output.dense.weight', 'module.vlbert.encoder.layer.5.output.dense.bias', 'module.vlbert.encoder.layer.5.output.LayerNorm.weight', 'module.vlbert.encoder.layer.5.output.LayerNorm.bias', 'module.vlbert.encoder.layer.6.attention.self.query.weight', 'module.vlbert.encoder.layer.6.attention.self.query.bias', 'module.vlbert.encoder.layer.6.attention.self.key.weight', 'module.vlbert.encoder.layer.6.attention.self.key.bias', 'module.vlbert.encoder.layer.6.attention.self.value.weight', 'module.vlbert.encoder.layer.6.attention.self.value.bias', 'module.vlbert.encoder.layer.6.attention.output.dense.weight', 'module.vlbert.encoder.layer.6.attention.output.dense.bias', 'module.vlbert.encoder.layer.6.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.6.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.6.intermediate.dense.weight', 'module.vlbert.encoder.layer.6.intermediate.dense.bias', 'module.vlbert.encoder.layer.6.output.dense.weight', 'module.vlbert.encoder.layer.6.output.dense.bias', 'module.vlbert.encoder.layer.6.output.LayerNorm.weight', 'module.vlbert.encoder.layer.6.output.LayerNorm.bias', 'module.vlbert.encoder.layer.7.attention.self.query.weight', 'module.vlbert.encoder.layer.7.attention.self.query.bias', 'module.vlbert.encoder.layer.7.attention.self.key.weight', 'module.vlbert.encoder.layer.7.attention.self.key.bias', 'module.vlbert.encoder.layer.7.attention.self.value.weight', 'module.vlbert.encoder.layer.7.attention.self.value.bias', 'module.vlbert.encoder.layer.7.attention.output.dense.weight', 'module.vlbert.encoder.layer.7.attention.output.dense.bias', 'module.vlbert.encoder.layer.7.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.7.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.7.intermediate.dense.weight', 'module.vlbert.encoder.layer.7.intermediate.dense.bias', 'module.vlbert.encoder.layer.7.output.dense.weight', 'module.vlbert.encoder.layer.7.output.dense.bias', 'module.vlbert.encoder.layer.7.output.LayerNorm.weight', 'module.vlbert.encoder.layer.7.output.LayerNorm.bias', 'module.vlbert.encoder.layer.8.attention.self.query.weight', 'module.vlbert.encoder.layer.8.attention.self.query.bias', 'module.vlbert.encoder.layer.8.attention.self.key.weight', 'module.vlbert.encoder.layer.8.attention.self.key.bias', 'module.vlbert.encoder.layer.8.attention.self.value.weight', 'module.vlbert.encoder.layer.8.attention.self.value.bias', 'module.vlbert.encoder.layer.8.attention.output.dense.weight', 'module.vlbert.encoder.layer.8.attention.output.dense.bias', 'module.vlbert.encoder.layer.8.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.8.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.8.intermediate.dense.weight', 'module.vlbert.encoder.layer.8.intermediate.dense.bias', 'module.vlbert.encoder.layer.8.output.dense.weight', 'module.vlbert.encoder.layer.8.output.dense.bias', 'module.vlbert.encoder.layer.8.output.LayerNorm.weight', 'module.vlbert.encoder.layer.8.output.LayerNorm.bias', 'module.vlbert.encoder.layer.9.attention.self.query.weight', 'module.vlbert.encoder.layer.9.attention.self.query.bias', 'module.vlbert.encoder.layer.9.attention.self.key.weight', 'module.vlbert.encoder.layer.9.attention.self.key.bias', 'module.vlbert.encoder.layer.9.attention.self.value.weight', 'module.vlbert.encoder.layer.9.attention.self.value.bias', 'module.vlbert.encoder.layer.9.attention.output.dense.weight', 'module.vlbert.encoder.layer.9.attention.output.dense.bias', 'module.vlbert.encoder.layer.9.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.9.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.9.intermediate.dense.weight', 'module.vlbert.encoder.layer.9.intermediate.dense.bias', 'module.vlbert.encoder.layer.9.output.dense.weight', 'module.vlbert.encoder.layer.9.output.dense.bias', 'module.vlbert.encoder.layer.9.output.LayerNorm.weight', 'module.vlbert.encoder.layer.9.output.LayerNorm.bias', 'module.vlbert.encoder.layer.10.attention.self.query.weight', 'module.vlbert.encoder.layer.10.attention.self.query.bias', 'module.vlbert.encoder.layer.10.attention.self.key.weight', 'module.vlbert.encoder.layer.10.attention.self.key.bias', 'module.vlbert.encoder.layer.10.attention.self.value.weight', 'module.vlbert.encoder.layer.10.attention.self.value.bias', 'module.vlbert.encoder.layer.10.attention.output.dense.weight', 'module.vlbert.encoder.layer.10.attention.output.dense.bias', 'module.vlbert.encoder.layer.10.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.10.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.10.intermediate.dense.weight', 'module.vlbert.encoder.layer.10.intermediate.dense.bias', 'module.vlbert.encoder.layer.10.output.dense.weight', 'module.vlbert.encoder.layer.10.output.dense.bias', 'module.vlbert.encoder.layer.10.output.LayerNorm.weight', 'module.vlbert.encoder.layer.10.output.LayerNorm.bias', 'module.vlbert.encoder.layer.11.attention.self.query.weight', 'module.vlbert.encoder.layer.11.attention.self.query.bias', 'module.vlbert.encoder.layer.11.attention.self.key.weight', 'module.vlbert.encoder.layer.11.attention.self.key.bias', 'module.vlbert.encoder.layer.11.attention.self.value.weight', 'module.vlbert.encoder.layer.11.attention.self.value.bias', 'module.vlbert.encoder.layer.11.attention.output.dense.weight', 'module.vlbert.encoder.layer.11.attention.output.dense.bias', 'module.vlbert.encoder.layer.11.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.11.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.11.intermediate.dense.weight', 'module.vlbert.encoder.layer.11.intermediate.dense.bias', 'module.vlbert.encoder.layer.11.output.dense.weight', 'module.vlbert.encoder.layer.11.output.dense.bias', 'module.vlbert.encoder.layer.11.output.LayerNorm.weight', 'module.vlbert.encoder.layer.11.output.LayerNorm.bias', 'module.vlbert.mlm_head.predictions.bias', 'module.vlbert.mlm_head.predictions.transform.dense.weight', 'module.vlbert.mlm_head.predictions.transform.dense.bias', 'module.vlbert.mlm_head.predictions.transform.LayerNorm.weight', 'module.vlbert.mlm_head.predictions.transform.LayerNorm.bias', 'module.vlbert.mlm_head.predictions.decoder.weight'])
[Partial Load] non matched keys: ['object_mask_word_embedding.weight', 'aux_text_visual_embedding.weight', 'vlbert.mvrc_head.transform.dense.weight', 'vlbert.mvrc_head.transform.dense.bias', 'vlbert.mvrc_head.region_cls_pred.weight', 'vlbert.mvrc_head.region_cls_pred.bias']
[Partial Load] non pretrain keys: []
[Partial Load] partial load state dict of keys: dict_keys(['module.image_feature_extractor.backbone.conv1.weight', 'module.image_feature_extractor.backbone.bn1.weight', 'module.image_feature_extractor.backbone.bn1.bias', 'module.image_feature_extractor.backbone.bn1.running_mean', 'module.image_feature_extractor.backbone.bn1.running_var', 'module.image_feature_extractor.backbone.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.conv1.weight', 'module.image_feature_extractor.backbone.layer1.0.bn1.weight', 'module.image_feature_extractor.backbone.layer1.0.bn1.bias', 'module.image_feature_extractor.backbone.layer1.0.bn1.running_mean', 'module.image_feature_extractor.backbone.layer1.0.bn1.running_var', 'module.image_feature_extractor.backbone.layer1.0.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.conv2.weight', 'module.image_feature_extractor.backbone.layer1.0.bn2.weight', 'module.image_feature_extractor.backbone.layer1.0.bn2.bias', 'module.image_feature_extractor.backbone.layer1.0.bn2.running_mean', 'module.image_feature_extractor.backbone.layer1.0.bn2.running_var', 'module.image_feature_extractor.backbone.layer1.0.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.conv3.weight', 'module.image_feature_extractor.backbone.layer1.0.bn3.weight', 'module.image_feature_extractor.backbone.layer1.0.bn3.bias', 'module.image_feature_extractor.backbone.layer1.0.bn3.running_mean', 'module.image_feature_extractor.backbone.layer1.0.bn3.running_var', 'module.image_feature_extractor.backbone.layer1.0.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.downsample.0.weight', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.weight', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.bias', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.running_mean', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.running_var', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.1.conv1.weight', 'module.image_feature_extractor.backbone.layer1.1.bn1.weight', 'module.image_feature_extractor.backbone.layer1.1.bn1.bias', 'module.image_feature_extractor.backbone.layer1.1.bn1.running_mean', 'module.image_feature_extractor.backbone.layer1.1.bn1.running_var', 'module.image_feature_extractor.backbone.layer1.1.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.1.conv2.weight', 'module.image_feature_extractor.backbone.layer1.1.bn2.weight', 'module.image_feature_extractor.backbone.layer1.1.bn2.bias', 'module.image_feature_extractor.backbone.layer1.1.bn2.running_mean', 'module.image_feature_extractor.backbone.layer1.1.bn2.running_var', 'module.image_feature_extractor.backbone.layer1.1.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.1.conv3.weight', 'module.image_feature_extractor.backbone.layer1.1.bn3.weight', 'module.image_feature_extractor.backbone.layer1.1.bn3.bias', 'module.image_feature_extractor.backbone.layer1.1.bn3.running_mean', 'module.image_feature_extractor.backbone.layer1.1.bn3.running_var', 'module.image_feature_extractor.backbone.layer1.1.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.2.conv1.weight', 'module.image_feature_extractor.backbone.layer1.2.bn1.weight', 'module.image_feature_extractor.backbone.layer1.2.bn1.bias', 'module.image_feature_extractor.backbone.layer1.2.bn1.running_mean', 'module.image_feature_extractor.backbone.layer1.2.bn1.running_var', 'module.image_feature_extractor.backbone.layer1.2.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.2.conv2.weight', 'module.image_feature_extractor.backbone.layer1.2.bn2.weight', 'module.image_feature_extractor.backbone.layer1.2.bn2.bias', 'module.image_feature_extractor.backbone.layer1.2.bn2.running_mean', 'module.image_feature_extractor.backbone.layer1.2.bn2.running_var', 'module.image_feature_extractor.backbone.layer1.2.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.2.conv3.weight', 'module.image_feature_extractor.backbone.layer1.2.bn3.weight', 'module.image_feature_extractor.backbone.layer1.2.bn3.bias', 'module.image_feature_extractor.backbone.layer1.2.bn3.running_mean', 'module.image_feature_extractor.backbone.layer1.2.bn3.running_var', 'module.image_feature_extractor.backbone.layer1.2.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.conv1.weight', 'module.image_feature_extractor.backbone.layer2.0.bn1.weight', 'module.image_feature_extractor.backbone.layer2.0.bn1.bias', 'module.image_feature_extractor.backbone.layer2.0.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.0.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.0.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.conv2.weight', 'module.image_feature_extractor.backbone.layer2.0.bn2.weight', 'module.image_feature_extractor.backbone.layer2.0.bn2.bias', 'module.image_feature_extractor.backbone.layer2.0.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.0.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.0.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.conv3.weight', 'module.image_feature_extractor.backbone.layer2.0.bn3.weight', 'module.image_feature_extractor.backbone.layer2.0.bn3.bias', 'module.image_feature_extractor.backbone.layer2.0.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.0.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.0.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.downsample.0.weight', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.weight', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.bias', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.running_mean', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.running_var', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.1.conv1.weight', 'module.image_feature_extractor.backbone.layer2.1.bn1.weight', 'module.image_feature_extractor.backbone.layer2.1.bn1.bias', 'module.image_feature_extractor.backbone.layer2.1.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.1.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.1.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.1.conv2.weight', 'module.image_feature_extractor.backbone.layer2.1.bn2.weight', 'module.image_feature_extractor.backbone.layer2.1.bn2.bias', 'module.image_feature_extractor.backbone.layer2.1.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.1.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.1.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.1.conv3.weight', 'module.image_feature_extractor.backbone.layer2.1.bn3.weight', 'module.image_feature_extractor.backbone.layer2.1.bn3.bias', 'module.image_feature_extractor.backbone.layer2.1.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.1.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.1.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.2.conv1.weight', 'module.image_feature_extractor.backbone.layer2.2.bn1.weight', 'module.image_feature_extractor.backbone.layer2.2.bn1.bias', 'module.image_feature_extractor.backbone.layer2.2.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.2.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.2.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.2.conv2.weight', 'module.image_feature_extractor.backbone.layer2.2.bn2.weight', 'module.image_feature_extractor.backbone.layer2.2.bn2.bias', 'module.image_feature_extractor.backbone.layer2.2.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.2.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.2.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.2.conv3.weight', 'module.image_feature_extractor.backbone.layer2.2.bn3.weight', 'module.image_feature_extractor.backbone.layer2.2.bn3.bias', 'module.image_feature_extractor.backbone.layer2.2.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.2.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.2.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.3.conv1.weight', 'module.image_feature_extractor.backbone.layer2.3.bn1.weight', 'module.image_feature_extractor.backbone.layer2.3.bn1.bias', 'module.image_feature_extractor.backbone.layer2.3.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.3.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.3.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.3.conv2.weight', 'module.image_feature_extractor.backbone.layer2.3.bn2.weight', 'module.image_feature_extractor.backbone.layer2.3.bn2.bias', 'module.image_feature_extractor.backbone.layer2.3.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.3.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.3.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.3.conv3.weight', 'module.image_feature_extractor.backbone.layer2.3.bn3.weight', 'module.image_feature_extractor.backbone.layer2.3.bn3.bias', 'module.image_feature_extractor.backbone.layer2.3.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.3.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.3.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.conv1.weight', 'module.image_feature_extractor.backbone.layer3.0.bn1.weight', 'module.image_feature_extractor.backbone.layer3.0.bn1.bias', 'module.image_feature_extractor.backbone.layer3.0.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.0.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.0.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.conv2.weight', 'module.image_feature_extractor.backbone.layer3.0.bn2.weight', 'module.image_feature_extractor.backbone.layer3.0.bn2.bias', 'module.image_feature_extractor.backbone.layer3.0.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.0.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.0.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.conv3.weight', 'module.image_feature_extractor.backbone.layer3.0.bn3.weight', 'module.image_feature_extractor.backbone.layer3.0.bn3.bias', 'module.image_feature_extractor.backbone.layer3.0.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.0.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.0.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.downsample.0.weight', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.weight', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.bias', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.running_mean', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.running_var', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.1.conv1.weight', 'module.image_feature_extractor.backbone.layer3.1.bn1.weight', 'module.image_feature_extractor.backbone.layer3.1.bn1.bias', 'module.image_feature_extractor.backbone.layer3.1.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.1.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.1.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.1.conv2.weight', 'module.image_feature_extractor.backbone.layer3.1.bn2.weight', 'module.image_feature_extractor.backbone.layer3.1.bn2.bias', 'module.image_feature_extractor.backbone.layer3.1.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.1.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.1.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.1.conv3.weight', 'module.image_feature_extractor.backbone.layer3.1.bn3.weight', 'module.image_feature_extractor.backbone.layer3.1.bn3.bias', 'module.image_feature_extractor.backbone.layer3.1.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.1.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.1.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.2.conv1.weight', 'module.image_feature_extractor.backbone.layer3.2.bn1.weight', 'module.image_feature_extractor.backbone.layer3.2.bn1.bias', 'module.image_feature_extractor.backbone.layer3.2.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.2.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.2.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.2.conv2.weight', 'module.image_feature_extractor.backbone.layer3.2.bn2.weight', 'module.image_feature_extractor.backbone.layer3.2.bn2.bias', 'module.image_feature_extractor.backbone.layer3.2.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.2.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.2.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.2.conv3.weight', 'module.image_feature_extractor.backbone.layer3.2.bn3.weight', 'module.image_feature_extractor.backbone.layer3.2.bn3.bias', 'module.image_feature_extractor.backbone.layer3.2.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.2.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.2.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.3.conv1.weight', 'module.image_feature_extractor.backbone.layer3.3.bn1.weight', 'module.image_feature_extractor.backbone.layer3.3.bn1.bias', 'module.image_feature_extractor.backbone.layer3.3.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.3.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.3.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.3.conv2.weight', 'module.image_feature_extractor.backbone.layer3.3.bn2.weight', 'module.image_feature_extractor.backbone.layer3.3.bn2.bias', 'module.image_feature_extractor.backbone.layer3.3.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.3.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.3.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.3.conv3.weight', 'module.image_feature_extractor.backbone.layer3.3.bn3.weight', 'module.image_feature_extractor.backbone.layer3.3.bn3.bias', 'module.image_feature_extractor.backbone.layer3.3.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.3.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.3.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.4.conv1.weight', 'module.image_feature_extractor.backbone.layer3.4.bn1.weight', 'module.image_feature_extractor.backbone.layer3.4.bn1.bias', 'module.image_feature_extractor.backbone.layer3.4.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.4.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.4.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.4.conv2.weight', 'module.image_feature_extractor.backbone.layer3.4.bn2.weight', 'module.image_feature_extractor.backbone.layer3.4.bn2.bias', 'module.image_feature_extractor.backbone.layer3.4.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.4.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.4.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.4.conv3.weight', 'module.image_feature_extractor.backbone.layer3.4.bn3.weight', 'module.image_feature_extractor.backbone.layer3.4.bn3.bias', 'module.image_feature_extractor.backbone.layer3.4.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.4.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.4.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.5.conv1.weight', 'module.image_feature_extractor.backbone.layer3.5.bn1.weight', 'module.image_feature_extractor.backbone.layer3.5.bn1.bias', 'module.image_feature_extractor.backbone.layer3.5.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.5.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.5.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.5.conv2.weight', 'module.image_feature_extractor.backbone.layer3.5.bn2.weight', 'module.image_feature_extractor.backbone.layer3.5.bn2.bias', 'module.image_feature_extractor.backbone.layer3.5.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.5.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.5.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.5.conv3.weight', 'module.image_feature_extractor.backbone.layer3.5.bn3.weight', 'module.image_feature_extractor.backbone.layer3.5.bn3.bias', 'module.image_feature_extractor.backbone.layer3.5.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.5.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.5.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.6.conv1.weight', 'module.image_feature_extractor.backbone.layer3.6.bn1.weight', 'module.image_feature_extractor.backbone.layer3.6.bn1.bias', 'module.image_feature_extractor.backbone.layer3.6.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.6.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.6.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.6.conv2.weight', 'module.image_feature_extractor.backbone.layer3.6.bn2.weight', 'module.image_feature_extractor.backbone.layer3.6.bn2.bias', 'module.image_feature_extractor.backbone.layer3.6.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.6.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.6.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.6.conv3.weight', 'module.image_feature_extractor.backbone.layer3.6.bn3.weight', 'module.image_feature_extractor.backbone.layer3.6.bn3.bias', 'module.image_feature_extractor.backbone.layer3.6.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.6.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.6.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.7.conv1.weight', 'module.image_feature_extractor.backbone.layer3.7.bn1.weight', 'module.image_feature_extractor.backbone.layer3.7.bn1.bias', 'module.image_feature_extractor.backbone.layer3.7.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.7.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.7.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.7.conv2.weight', 'module.image_feature_extractor.backbone.layer3.7.bn2.weight', 'module.image_feature_extractor.backbone.layer3.7.bn2.bias', 'module.image_feature_extractor.backbone.layer3.7.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.7.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.7.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.7.conv3.weight', 'module.image_feature_extractor.backbone.layer3.7.bn3.weight', 'module.image_feature_extractor.backbone.layer3.7.bn3.bias', 'module.image_feature_extractor.backbone.layer3.7.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.7.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.7.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.8.conv1.weight', 'module.image_feature_extractor.backbone.layer3.8.bn1.weight', 'module.image_feature_extractor.backbone.layer3.8.bn1.bias', 'module.image_feature_extractor.backbone.layer3.8.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.8.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.8.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.8.conv2.weight', 'module.image_feature_extractor.backbone.layer3.8.bn2.weight', 'module.image_feature_extractor.backbone.layer3.8.bn2.bias', 'module.image_feature_extractor.backbone.layer3.8.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.8.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.8.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.8.conv3.weight', 'module.image_feature_extractor.backbone.layer3.8.bn3.weight', 'module.image_feature_extractor.backbone.layer3.8.bn3.bias', 'module.image_feature_extractor.backbone.layer3.8.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.8.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.8.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.9.conv1.weight', 'module.image_feature_extractor.backbone.layer3.9.bn1.weight', 'module.image_feature_extractor.backbone.layer3.9.bn1.bias', 'module.image_feature_extractor.backbone.layer3.9.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.9.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.9.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.9.conv2.weight', 'module.image_feature_extractor.backbone.layer3.9.bn2.weight', 'module.image_feature_extractor.backbone.layer3.9.bn2.bias', 'module.image_feature_extractor.backbone.layer3.9.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.9.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.9.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.9.conv3.weight', 'module.image_feature_extractor.backbone.layer3.9.bn3.weight', 'module.image_feature_extractor.backbone.layer3.9.bn3.bias', 'module.image_feature_extractor.backbone.layer3.9.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.9.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.9.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.10.conv1.weight', 'module.image_feature_extractor.backbone.layer3.10.bn1.weight', 'module.image_feature_extractor.backbone.layer3.10.bn1.bias', 'module.image_feature_extractor.backbone.layer3.10.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.10.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.10.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.10.conv2.weight', 'module.image_feature_extractor.backbone.layer3.10.bn2.weight', 'module.image_feature_extractor.backbone.layer3.10.bn2.bias', 'module.image_feature_extractor.backbone.layer3.10.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.10.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.10.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.10.conv3.weight', 'module.image_feature_extractor.backbone.layer3.10.bn3.weight', 'module.image_feature_extractor.backbone.layer3.10.bn3.bias', 'module.image_feature_extractor.backbone.layer3.10.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.10.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.10.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.11.conv1.weight', 'module.image_feature_extractor.backbone.layer3.11.bn1.weight', 'module.image_feature_extractor.backbone.layer3.11.bn1.bias', 'module.image_feature_extractor.backbone.layer3.11.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.11.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.11.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.11.conv2.weight', 'module.image_feature_extractor.backbone.layer3.11.bn2.weight', 'module.image_feature_extractor.backbone.layer3.11.bn2.bias', 'module.image_feature_extractor.backbone.layer3.11.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.11.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.11.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.11.conv3.weight', 'module.image_feature_extractor.backbone.layer3.11.bn3.weight', 'module.image_feature_extractor.backbone.layer3.11.bn3.bias', 'module.image_feature_extractor.backbone.layer3.11.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.11.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.11.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.12.conv1.weight', 'module.image_feature_extractor.backbone.layer3.12.bn1.weight', 'module.image_feature_extractor.backbone.layer3.12.bn1.bias', 'module.image_feature_extractor.backbone.layer3.12.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.12.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.12.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.12.conv2.weight', 'module.image_feature_extractor.backbone.layer3.12.bn2.weight', 'module.image_feature_extractor.backbone.layer3.12.bn2.bias', 'module.image_feature_extractor.backbone.layer3.12.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.12.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.12.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.12.conv3.weight', 'module.image_feature_extractor.backbone.layer3.12.bn3.weight', 'module.image_feature_extractor.backbone.layer3.12.bn3.bias', 'module.image_feature_extractor.backbone.layer3.12.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.12.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.12.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.13.conv1.weight', 'module.image_feature_extractor.backbone.layer3.13.bn1.weight', 'module.image_feature_extractor.backbone.layer3.13.bn1.bias', 'module.image_feature_extractor.backbone.layer3.13.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.13.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.13.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.13.conv2.weight', 'module.image_feature_extractor.backbone.layer3.13.bn2.weight', 'module.image_feature_extractor.backbone.layer3.13.bn2.bias', 'module.image_feature_extractor.backbone.layer3.13.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.13.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.13.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.13.conv3.weight', 'module.image_feature_extractor.backbone.layer3.13.bn3.weight', 'module.image_feature_extractor.backbone.layer3.13.bn3.bias', 'module.image_feature_extractor.backbone.layer3.13.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.13.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.13.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.14.conv1.weight', 'module.image_feature_extractor.backbone.layer3.14.bn1.weight', 'module.image_feature_extractor.backbone.layer3.14.bn1.bias', 'module.image_feature_extractor.backbone.layer3.14.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.14.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.14.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.14.conv2.weight', 'module.image_feature_extractor.backbone.layer3.14.bn2.weight', 'module.image_feature_extractor.backbone.layer3.14.bn2.bias', 'module.image_feature_extractor.backbone.layer3.14.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.14.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.14.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.14.conv3.weight', 'module.image_feature_extractor.backbone.layer3.14.bn3.weight', 'module.image_feature_extractor.backbone.layer3.14.bn3.bias', 'module.image_feature_extractor.backbone.layer3.14.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.14.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.14.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.15.conv1.weight', 'module.image_feature_extractor.backbone.layer3.15.bn1.weight', 'module.image_feature_extractor.backbone.layer3.15.bn1.bias', 'module.image_feature_extractor.backbone.layer3.15.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.15.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.15.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.15.conv2.weight', 'module.image_feature_extractor.backbone.layer3.15.bn2.weight', 'module.image_feature_extractor.backbone.layer3.15.bn2.bias', 'module.image_feature_extractor.backbone.layer3.15.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.15.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.15.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.15.conv3.weight', 'module.image_feature_extractor.backbone.layer3.15.bn3.weight', 'module.image_feature_extractor.backbone.layer3.15.bn3.bias', 'module.image_feature_extractor.backbone.layer3.15.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.15.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.15.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.16.conv1.weight', 'module.image_feature_extractor.backbone.layer3.16.bn1.weight', 'module.image_feature_extractor.backbone.layer3.16.bn1.bias', 'module.image_feature_extractor.backbone.layer3.16.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.16.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.16.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.16.conv2.weight', 'module.image_feature_extractor.backbone.layer3.16.bn2.weight', 'module.image_feature_extractor.backbone.layer3.16.bn2.bias', 'module.image_feature_extractor.backbone.layer3.16.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.16.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.16.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.16.conv3.weight', 'module.image_feature_extractor.backbone.layer3.16.bn3.weight', 'module.image_feature_extractor.backbone.layer3.16.bn3.bias', 'module.image_feature_extractor.backbone.layer3.16.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.16.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.16.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.17.conv1.weight', 'module.image_feature_extractor.backbone.layer3.17.bn1.weight', 'module.image_feature_extractor.backbone.layer3.17.bn1.bias', 'module.image_feature_extractor.backbone.layer3.17.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.17.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.17.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.17.conv2.weight', 'module.image_feature_extractor.backbone.layer3.17.bn2.weight', 'module.image_feature_extractor.backbone.layer3.17.bn2.bias', 'module.image_feature_extractor.backbone.layer3.17.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.17.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.17.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.17.conv3.weight', 'module.image_feature_extractor.backbone.layer3.17.bn3.weight', 'module.image_feature_extractor.backbone.layer3.17.bn3.bias', 'module.image_feature_extractor.backbone.layer3.17.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.17.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.17.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.18.conv1.weight', 'module.image_feature_extractor.backbone.layer3.18.bn1.weight', 'module.image_feature_extractor.backbone.layer3.18.bn1.bias', 'module.image_feature_extractor.backbone.layer3.18.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.18.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.18.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.18.conv2.weight', 'module.image_feature_extractor.backbone.layer3.18.bn2.weight', 'module.image_feature_extractor.backbone.layer3.18.bn2.bias', 'module.image_feature_extractor.backbone.layer3.18.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.18.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.18.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.18.conv3.weight', 'module.image_feature_extractor.backbone.layer3.18.bn3.weight', 'module.image_feature_extractor.backbone.layer3.18.bn3.bias', 'module.image_feature_extractor.backbone.layer3.18.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.18.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.18.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.19.conv1.weight', 'module.image_feature_extractor.backbone.layer3.19.bn1.weight', 'module.image_feature_extractor.backbone.layer3.19.bn1.bias', 'module.image_feature_extractor.backbone.layer3.19.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.19.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.19.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.19.conv2.weight', 'module.image_feature_extractor.backbone.layer3.19.bn2.weight', 'module.image_feature_extractor.backbone.layer3.19.bn2.bias', 'module.image_feature_extractor.backbone.layer3.19.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.19.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.19.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.19.conv3.weight', 'module.image_feature_extractor.backbone.layer3.19.bn3.weight', 'module.image_feature_extractor.backbone.layer3.19.bn3.bias', 'module.image_feature_extractor.backbone.layer3.19.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.19.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.19.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.20.conv1.weight', 'module.image_feature_extractor.backbone.layer3.20.bn1.weight', 'module.image_feature_extractor.backbone.layer3.20.bn1.bias', 'module.image_feature_extractor.backbone.layer3.20.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.20.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.20.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.20.conv2.weight', 'module.image_feature_extractor.backbone.layer3.20.bn2.weight', 'module.image_feature_extractor.backbone.layer3.20.bn2.bias', 'module.image_feature_extractor.backbone.layer3.20.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.20.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.20.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.20.conv3.weight', 'module.image_feature_extractor.backbone.layer3.20.bn3.weight', 'module.image_feature_extractor.backbone.layer3.20.bn3.bias', 'module.image_feature_extractor.backbone.layer3.20.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.20.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.20.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.21.conv1.weight', 'module.image_feature_extractor.backbone.layer3.21.bn1.weight', 'module.image_feature_extractor.backbone.layer3.21.bn1.bias', 'module.image_feature_extractor.backbone.layer3.21.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.21.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.21.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.21.conv2.weight', 'module.image_feature_extractor.backbone.layer3.21.bn2.weight', 'module.image_feature_extractor.backbone.layer3.21.bn2.bias', 'module.image_feature_extractor.backbone.layer3.21.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.21.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.21.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.21.conv3.weight', 'module.image_feature_extractor.backbone.layer3.21.bn3.weight', 'module.image_feature_extractor.backbone.layer3.21.bn3.bias', 'module.image_feature_extractor.backbone.layer3.21.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.21.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.21.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.22.conv1.weight', 'module.image_feature_extractor.backbone.layer3.22.bn1.weight', 'module.image_feature_extractor.backbone.layer3.22.bn1.bias', 'module.image_feature_extractor.backbone.layer3.22.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.22.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.22.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.22.conv2.weight', 'module.image_feature_extractor.backbone.layer3.22.bn2.weight', 'module.image_feature_extractor.backbone.layer3.22.bn2.bias', 'module.image_feature_extractor.backbone.layer3.22.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.22.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.22.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.22.conv3.weight', 'module.image_feature_extractor.backbone.layer3.22.bn3.weight', 'module.image_feature_extractor.backbone.layer3.22.bn3.bias', 'module.image_feature_extractor.backbone.layer3.22.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.22.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.22.bn3.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.conv1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.conv2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.conv3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.0.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.1.conv1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.1.conv2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.bias', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.1.conv3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.bias', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.2.conv1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.2.conv2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.bias', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.2.conv3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.bias', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.num_batches_tracked', 'module.image_feature_extractor.head.0.0.conv1.weight', 'module.image_feature_extractor.head.0.0.bn1.weight', 'module.image_feature_extractor.head.0.0.bn1.bias', 'module.image_feature_extractor.head.0.0.bn1.running_mean', 'module.image_feature_extractor.head.0.0.bn1.running_var', 'module.image_feature_extractor.head.0.0.bn1.num_batches_tracked', 'module.image_feature_extractor.head.0.0.conv2.weight', 'module.image_feature_extractor.head.0.0.bn2.weight', 'module.image_feature_extractor.head.0.0.bn2.bias', 'module.image_feature_extractor.head.0.0.bn2.running_mean', 'module.image_feature_extractor.head.0.0.bn2.running_var', 'module.image_feature_extractor.head.0.0.bn2.num_batches_tracked', 'module.image_feature_extractor.head.0.0.conv3.weight', 'module.image_feature_extractor.head.0.0.bn3.weight', 'module.image_feature_extractor.head.0.0.bn3.bias', 'module.image_feature_extractor.head.0.0.bn3.running_mean', 'module.image_feature_extractor.head.0.0.bn3.running_var', 'module.image_feature_extractor.head.0.0.bn3.num_batches_tracked', 'module.image_feature_extractor.head.0.0.downsample.0.weight', 'module.image_feature_extractor.head.0.0.downsample.1.weight', 'module.image_feature_extractor.head.0.0.downsample.1.bias', 'module.image_feature_extractor.head.0.0.downsample.1.running_mean', 'module.image_feature_extractor.head.0.0.downsample.1.running_var', 'module.image_feature_extractor.head.0.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.head.0.1.conv1.weight', 'module.image_feature_extractor.head.0.1.bn1.weight', 'module.image_feature_extractor.head.0.1.bn1.bias', 'module.image_feature_extractor.head.0.1.bn1.running_mean', 'module.image_feature_extractor.head.0.1.bn1.running_var', 'module.image_feature_extractor.head.0.1.bn1.num_batches_tracked', 'module.image_feature_extractor.head.0.1.conv2.weight', 'module.image_feature_extractor.head.0.1.bn2.weight', 'module.image_feature_extractor.head.0.1.bn2.bias', 'module.image_feature_extractor.head.0.1.bn2.running_mean', 'module.image_feature_extractor.head.0.1.bn2.running_var', 'module.image_feature_extractor.head.0.1.bn2.num_batches_tracked', 'module.image_feature_extractor.head.0.1.conv3.weight', 'module.image_feature_extractor.head.0.1.bn3.weight', 'module.image_feature_extractor.head.0.1.bn3.bias', 'module.image_feature_extractor.head.0.1.bn3.running_mean', 'module.image_feature_extractor.head.0.1.bn3.running_var', 'module.image_feature_extractor.head.0.1.bn3.num_batches_tracked', 'module.image_feature_extractor.head.0.2.conv1.weight', 'module.image_feature_extractor.head.0.2.bn1.weight', 'module.image_feature_extractor.head.0.2.bn1.bias', 'module.image_feature_extractor.head.0.2.bn1.running_mean', 'module.image_feature_extractor.head.0.2.bn1.running_var', 'module.image_feature_extractor.head.0.2.bn1.num_batches_tracked', 'module.image_feature_extractor.head.0.2.conv2.weight', 'module.image_feature_extractor.head.0.2.bn2.weight', 'module.image_feature_extractor.head.0.2.bn2.bias', 'module.image_feature_extractor.head.0.2.bn2.running_mean', 'module.image_feature_extractor.head.0.2.bn2.running_var', 'module.image_feature_extractor.head.0.2.bn2.num_batches_tracked', 'module.image_feature_extractor.head.0.2.conv3.weight', 'module.image_feature_extractor.head.0.2.bn3.weight', 'module.image_feature_extractor.head.0.2.bn3.bias', 'module.image_feature_extractor.head.0.2.bn3.running_mean', 'module.image_feature_extractor.head.0.2.bn3.running_var', 'module.image_feature_extractor.head.0.2.bn3.num_batches_tracked', 'module.image_feature_extractor.obj_downsample.1.weight', 'module.image_feature_extractor.obj_downsample.1.bias', 'module.object_linguistic_embeddings.weight', 'module.vlbert.word_embeddings.weight', 'module.vlbert.end_embedding.weight', 'module.vlbert.position_embeddings.weight', 'module.vlbert.token_type_embeddings.weight', 'module.vlbert.embedding_LayerNorm.weight', 'module.vlbert.embedding_LayerNorm.bias', 'module.vlbert.visual_ln_text.weight', 'module.vlbert.visual_ln_text.bias', 'module.vlbert.visual_ln_object.weight', 'module.vlbert.visual_ln_object.bias', 'module.vlbert.encoder.layer.0.attention.self.query.weight', 'module.vlbert.encoder.layer.0.attention.self.query.bias', 'module.vlbert.encoder.layer.0.attention.self.key.weight', 'module.vlbert.encoder.layer.0.attention.self.key.bias', 'module.vlbert.encoder.layer.0.attention.self.value.weight', 'module.vlbert.encoder.layer.0.attention.self.value.bias', 'module.vlbert.encoder.layer.0.attention.output.dense.weight', 'module.vlbert.encoder.layer.0.attention.output.dense.bias', 'module.vlbert.encoder.layer.0.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.0.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.0.intermediate.dense.weight', 'module.vlbert.encoder.layer.0.intermediate.dense.bias', 'module.vlbert.encoder.layer.0.output.dense.weight', 'module.vlbert.encoder.layer.0.output.dense.bias', 'module.vlbert.encoder.layer.0.output.LayerNorm.weight', 'module.vlbert.encoder.layer.0.output.LayerNorm.bias', 'module.vlbert.encoder.layer.1.attention.self.query.weight', 'module.vlbert.encoder.layer.1.attention.self.query.bias', 'module.vlbert.encoder.layer.1.attention.self.key.weight', 'module.vlbert.encoder.layer.1.attention.self.key.bias', 'module.vlbert.encoder.layer.1.attention.self.value.weight', 'module.vlbert.encoder.layer.1.attention.self.value.bias', 'module.vlbert.encoder.layer.1.attention.output.dense.weight', 'module.vlbert.encoder.layer.1.attention.output.dense.bias', 'module.vlbert.encoder.layer.1.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.1.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.1.intermediate.dense.weight', 'module.vlbert.encoder.layer.1.intermediate.dense.bias', 'module.vlbert.encoder.layer.1.output.dense.weight', 'module.vlbert.encoder.layer.1.output.dense.bias', 'module.vlbert.encoder.layer.1.output.LayerNorm.weight', 'module.vlbert.encoder.layer.1.output.LayerNorm.bias', 'module.vlbert.encoder.layer.2.attention.self.query.weight', 'module.vlbert.encoder.layer.2.attention.self.query.bias', 'module.vlbert.encoder.layer.2.attention.self.key.weight', 'module.vlbert.encoder.layer.2.attention.self.key.bias', 'module.vlbert.encoder.layer.2.attention.self.value.weight', 'module.vlbert.encoder.layer.2.attention.self.value.bias', 'module.vlbert.encoder.layer.2.attention.output.dense.weight', 'module.vlbert.encoder.layer.2.attention.output.dense.bias', 'module.vlbert.encoder.layer.2.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.2.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.2.intermediate.dense.weight', 'module.vlbert.encoder.layer.2.intermediate.dense.bias', 'module.vlbert.encoder.layer.2.output.dense.weight', 'module.vlbert.encoder.layer.2.output.dense.bias', 'module.vlbert.encoder.layer.2.output.LayerNorm.weight', 'module.vlbert.encoder.layer.2.output.LayerNorm.bias', 'module.vlbert.encoder.layer.3.attention.self.query.weight', 'module.vlbert.encoder.layer.3.attention.self.query.bias', 'module.vlbert.encoder.layer.3.attention.self.key.weight', 'module.vlbert.encoder.layer.3.attention.self.key.bias', 'module.vlbert.encoder.layer.3.attention.self.value.weight', 'module.vlbert.encoder.layer.3.attention.self.value.bias', 'module.vlbert.encoder.layer.3.attention.output.dense.weight', 'module.vlbert.encoder.layer.3.attention.output.dense.bias', 'module.vlbert.encoder.layer.3.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.3.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.3.intermediate.dense.weight', 'module.vlbert.encoder.layer.3.intermediate.dense.bias', 'module.vlbert.encoder.layer.3.output.dense.weight', 'module.vlbert.encoder.layer.3.output.dense.bias', 'module.vlbert.encoder.layer.3.output.LayerNorm.weight', 'module.vlbert.encoder.layer.3.output.LayerNorm.bias', 'module.vlbert.encoder.layer.4.attention.self.query.weight', 'module.vlbert.encoder.layer.4.attention.self.query.bias', 'module.vlbert.encoder.layer.4.attention.self.key.weight', 'module.vlbert.encoder.layer.4.attention.self.key.bias', 'module.vlbert.encoder.layer.4.attention.self.value.weight', 'module.vlbert.encoder.layer.4.attention.self.value.bias', 'module.vlbert.encoder.layer.4.attention.output.dense.weight', 'module.vlbert.encoder.layer.4.attention.output.dense.bias', 'module.vlbert.encoder.layer.4.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.4.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.4.intermediate.dense.weight', 'module.vlbert.encoder.layer.4.intermediate.dense.bias', 'module.vlbert.encoder.layer.4.output.dense.weight', 'module.vlbert.encoder.layer.4.output.dense.bias', 'module.vlbert.encoder.layer.4.output.LayerNorm.weight', 'module.vlbert.encoder.layer.4.output.LayerNorm.bias', 'module.vlbert.encoder.layer.5.attention.self.query.weight', 'module.vlbert.encoder.layer.5.attention.self.query.bias', 'module.vlbert.encoder.layer.5.attention.self.key.weight', 'module.vlbert.encoder.layer.5.attention.self.key.bias', 'module.vlbert.encoder.layer.5.attention.self.value.weight', 'module.vlbert.encoder.layer.5.attention.self.value.bias', 'module.vlbert.encoder.layer.5.attention.output.dense.weight', 'module.vlbert.encoder.layer.5.attention.output.dense.bias', 'module.vlbert.encoder.layer.5.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.5.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.5.intermediate.dense.weight', 'module.vlbert.encoder.layer.5.intermediate.dense.bias', 'module.vlbert.encoder.layer.5.output.dense.weight', 'module.vlbert.encoder.layer.5.output.dense.bias', 'module.vlbert.encoder.layer.5.output.LayerNorm.weight', 'module.vlbert.encoder.layer.5.output.LayerNorm.bias', 'module.vlbert.encoder.layer.6.attention.self.query.weight', 'module.vlbert.encoder.layer.6.attention.self.query.bias', 'module.vlbert.encoder.layer.6.attention.self.key.weight', 'module.vlbert.encoder.layer.6.attention.self.key.bias', 'module.vlbert.encoder.layer.6.attention.self.value.weight', 'module.vlbert.encoder.layer.6.attention.self.value.bias', 'module.vlbert.encoder.layer.6.attention.output.dense.weight', 'module.vlbert.encoder.layer.6.attention.output.dense.bias', 'module.vlbert.encoder.layer.6.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.6.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.6.intermediate.dense.weight', 'module.vlbert.encoder.layer.6.intermediate.dense.bias', 'module.vlbert.encoder.layer.6.output.dense.weight', 'module.vlbert.encoder.layer.6.output.dense.bias', 'module.vlbert.encoder.layer.6.output.LayerNorm.weight', 'module.vlbert.encoder.layer.6.output.LayerNorm.bias', 'module.vlbert.encoder.layer.7.attention.self.query.weight', 'module.vlbert.encoder.layer.7.attention.self.query.bias', 'module.vlbert.encoder.layer.7.attention.self.key.weight', 'module.vlbert.encoder.layer.7.attention.self.key.bias', 'module.vlbert.encoder.layer.7.attention.self.value.weight', 'module.vlbert.encoder.layer.7.attention.self.value.bias', 'module.vlbert.encoder.layer.7.attention.output.dense.weight', 'module.vlbert.encoder.layer.7.attention.output.dense.bias', 'module.vlbert.encoder.layer.7.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.7.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.7.intermediate.dense.weight', 'module.vlbert.encoder.layer.7.intermediate.dense.bias', 'module.vlbert.encoder.layer.7.output.dense.weight', 'module.vlbert.encoder.layer.7.output.dense.bias', 'module.vlbert.encoder.layer.7.output.LayerNorm.weight', 'module.vlbert.encoder.layer.7.output.LayerNorm.bias', 'module.vlbert.encoder.layer.8.attention.self.query.weight', 'module.vlbert.encoder.layer.8.attention.self.query.bias', 'module.vlbert.encoder.layer.8.attention.self.key.weight', 'module.vlbert.encoder.layer.8.attention.self.key.bias', 'module.vlbert.encoder.layer.8.attention.self.value.weight', 'module.vlbert.encoder.layer.8.attention.self.value.bias', 'module.vlbert.encoder.layer.8.attention.output.dense.weight', 'module.vlbert.encoder.layer.8.attention.output.dense.bias', 'module.vlbert.encoder.layer.8.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.8.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.8.intermediate.dense.weight', 'module.vlbert.encoder.layer.8.intermediate.dense.bias', 'module.vlbert.encoder.layer.8.output.dense.weight', 'module.vlbert.encoder.layer.8.output.dense.bias', 'module.vlbert.encoder.layer.8.output.LayerNorm.weight', 'module.vlbert.encoder.layer.8.output.LayerNorm.bias', 'module.vlbert.encoder.layer.9.attention.self.query.weight', 'module.vlbert.encoder.layer.9.attention.self.query.bias', 'module.vlbert.encoder.layer.9.attention.self.key.weight', 'module.vlbert.encoder.layer.9.attention.self.key.bias', 'module.vlbert.encoder.layer.9.attention.self.value.weight', 'module.vlbert.encoder.layer.9.attention.self.value.bias', 'module.vlbert.encoder.layer.9.attention.output.dense.weight', 'module.vlbert.encoder.layer.9.attention.output.dense.bias', 'module.vlbert.encoder.layer.9.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.9.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.9.intermediate.dense.weight', 'module.vlbert.encoder.layer.9.intermediate.dense.bias', 'module.vlbert.encoder.layer.9.output.dense.weight', 'module.vlbert.encoder.layer.9.output.dense.bias', 'module.vlbert.encoder.layer.9.output.LayerNorm.weight', 'module.vlbert.encoder.layer.9.output.LayerNorm.bias', 'module.vlbert.encoder.layer.10.attention.self.query.weight', 'module.vlbert.encoder.layer.10.attention.self.query.bias', 'module.vlbert.encoder.layer.10.attention.self.key.weight', 'module.vlbert.encoder.layer.10.attention.self.key.bias', 'module.vlbert.encoder.layer.10.attention.self.value.weight', 'module.vlbert.encoder.layer.10.attention.self.value.bias', 'module.vlbert.encoder.layer.10.attention.output.dense.weight', 'module.vlbert.encoder.layer.10.attention.output.dense.bias', 'module.vlbert.encoder.layer.10.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.10.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.10.intermediate.dense.weight', 'module.vlbert.encoder.layer.10.intermediate.dense.bias', 'module.vlbert.encoder.layer.10.output.dense.weight', 'module.vlbert.encoder.layer.10.output.dense.bias', 'module.vlbert.encoder.layer.10.output.LayerNorm.weight', 'module.vlbert.encoder.layer.10.output.LayerNorm.bias', 'module.vlbert.encoder.layer.11.attention.self.query.weight', 'module.vlbert.encoder.layer.11.attention.self.query.bias', 'module.vlbert.encoder.layer.11.attention.self.key.weight', 'module.vlbert.encoder.layer.11.attention.self.key.bias', 'module.vlbert.encoder.layer.11.attention.self.value.weight', 'module.vlbert.encoder.layer.11.attention.self.value.bias', 'module.vlbert.encoder.layer.11.attention.output.dense.weight', 'module.vlbert.encoder.layer.11.attention.output.dense.bias', 'module.vlbert.encoder.layer.11.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.11.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.11.intermediate.dense.weight', 'module.vlbert.encoder.layer.11.intermediate.dense.bias', 'module.vlbert.encoder.layer.11.output.dense.weight', 'module.vlbert.encoder.layer.11.output.dense.bias', 'module.vlbert.encoder.layer.11.output.LayerNorm.weight', 'module.vlbert.encoder.layer.11.output.LayerNorm.bias', 'module.vlbert.mlm_head.predictions.bias', 'module.vlbert.mlm_head.predictions.transform.dense.weight', 'module.vlbert.mlm_head.predictions.transform.dense.bias', 'module.vlbert.mlm_head.predictions.transform.LayerNorm.weight', 'module.vlbert.mlm_head.predictions.transform.LayerNorm.bias', 'module.vlbert.mlm_head.predictions.decoder.weight'])
[Partial Load] non matched keys: ['object_mask_word_embedding.weight', 'aux_text_visual_embedding.weight', 'vlbert.mvrc_head.transform.dense.weight', 'vlbert.mvrc_head.transform.dense.bias', 'vlbert.mvrc_head.region_cls_pred.weight', 'vlbert.mvrc_head.region_cls_pred.bias']
[Partial Load] non pretrain keys: []
[Partial Load] partial load state dict of keys: dict_keys(['module.image_feature_extractor.backbone.conv1.weight', 'module.image_feature_extractor.backbone.bn1.weight', 'module.image_feature_extractor.backbone.bn1.bias', 'module.image_feature_extractor.backbone.bn1.running_mean', 'module.image_feature_extractor.backbone.bn1.running_var', 'module.image_feature_extractor.backbone.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.conv1.weight', 'module.image_feature_extractor.backbone.layer1.0.bn1.weight', 'module.image_feature_extractor.backbone.layer1.0.bn1.bias', 'module.image_feature_extractor.backbone.layer1.0.bn1.running_mean', 'module.image_feature_extractor.backbone.layer1.0.bn1.running_var', 'module.image_feature_extractor.backbone.layer1.0.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.conv2.weight', 'module.image_feature_extractor.backbone.layer1.0.bn2.weight', 'module.image_feature_extractor.backbone.layer1.0.bn2.bias', 'module.image_feature_extractor.backbone.layer1.0.bn2.running_mean', 'module.image_feature_extractor.backbone.layer1.0.bn2.running_var', 'module.image_feature_extractor.backbone.layer1.0.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.conv3.weight', 'module.image_feature_extractor.backbone.layer1.0.bn3.weight', 'module.image_feature_extractor.backbone.layer1.0.bn3.bias', 'module.image_feature_extractor.backbone.layer1.0.bn3.running_mean', 'module.image_feature_extractor.backbone.layer1.0.bn3.running_var', 'module.image_feature_extractor.backbone.layer1.0.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.downsample.0.weight', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.weight', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.bias', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.running_mean', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.running_var', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.1.conv1.weight', 'module.image_feature_extractor.backbone.layer1.1.bn1.weight', 'module.image_feature_extractor.backbone.layer1.1.bn1.bias', 'module.image_feature_extractor.backbone.layer1.1.bn1.running_mean', 'module.image_feature_extractor.backbone.layer1.1.bn1.running_var', 'module.image_feature_extractor.backbone.layer1.1.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.1.conv2.weight', 'module.image_feature_extractor.backbone.layer1.1.bn2.weight', 'module.image_feature_extractor.backbone.layer1.1.bn2.bias', 'module.image_feature_extractor.backbone.layer1.1.bn2.running_mean', 'module.image_feature_extractor.backbone.layer1.1.bn2.running_var', 'module.image_feature_extractor.backbone.layer1.1.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.1.conv3.weight', 'module.image_feature_extractor.backbone.layer1.1.bn3.weight', 'module.image_feature_extractor.backbone.layer1.1.bn3.bias', 'module.image_feature_extractor.backbone.layer1.1.bn3.running_mean', 'module.image_feature_extractor.backbone.layer1.1.bn3.running_var', 'module.image_feature_extractor.backbone.layer1.1.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.2.conv1.weight', 'module.image_feature_extractor.backbone.layer1.2.bn1.weight', 'module.image_feature_extractor.backbone.layer1.2.bn1.bias', 'module.image_feature_extractor.backbone.layer1.2.bn1.running_mean', 'module.image_feature_extractor.backbone.layer1.2.bn1.running_var', 'module.image_feature_extractor.backbone.layer1.2.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.2.conv2.weight', 'module.image_feature_extractor.backbone.layer1.2.bn2.weight', 'module.image_feature_extractor.backbone.layer1.2.bn2.bias', 'module.image_feature_extractor.backbone.layer1.2.bn2.running_mean', 'module.image_feature_extractor.backbone.layer1.2.bn2.running_var', 'module.image_feature_extractor.backbone.layer1.2.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.2.conv3.weight', 'module.image_feature_extractor.backbone.layer1.2.bn3.weight', 'module.image_feature_extractor.backbone.layer1.2.bn3.bias', 'module.image_feature_extractor.backbone.layer1.2.bn3.running_mean', 'module.image_feature_extractor.backbone.layer1.2.bn3.running_var', 'module.image_feature_extractor.backbone.layer1.2.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.conv1.weight', 'module.image_feature_extractor.backbone.layer2.0.bn1.weight', 'module.image_feature_extractor.backbone.layer2.0.bn1.bias', 'module.image_feature_extractor.backbone.layer2.0.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.0.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.0.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.conv2.weight', 'module.image_feature_extractor.backbone.layer2.0.bn2.weight', 'module.image_feature_extractor.backbone.layer2.0.bn2.bias', 'module.image_feature_extractor.backbone.layer2.0.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.0.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.0.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.conv3.weight', 'module.image_feature_extractor.backbone.layer2.0.bn3.weight', 'module.image_feature_extractor.backbone.layer2.0.bn3.bias', 'module.image_feature_extractor.backbone.layer2.0.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.0.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.0.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.downsample.0.weight', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.weight', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.bias', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.running_mean', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.running_var', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.1.conv1.weight', 'module.image_feature_extractor.backbone.layer2.1.bn1.weight', 'module.image_feature_extractor.backbone.layer2.1.bn1.bias', 'module.image_feature_extractor.backbone.layer2.1.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.1.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.1.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.1.conv2.weight', 'module.image_feature_extractor.backbone.layer2.1.bn2.weight', 'module.image_feature_extractor.backbone.layer2.1.bn2.bias', 'module.image_feature_extractor.backbone.layer2.1.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.1.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.1.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.1.conv3.weight', 'module.image_feature_extractor.backbone.layer2.1.bn3.weight', 'module.image_feature_extractor.backbone.layer2.1.bn3.bias', 'module.image_feature_extractor.backbone.layer2.1.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.1.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.1.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.2.conv1.weight', 'module.image_feature_extractor.backbone.layer2.2.bn1.weight', 'module.image_feature_extractor.backbone.layer2.2.bn1.bias', 'module.image_feature_extractor.backbone.layer2.2.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.2.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.2.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.2.conv2.weight', 'module.image_feature_extractor.backbone.layer2.2.bn2.weight', 'module.image_feature_extractor.backbone.layer2.2.bn2.bias', 'module.image_feature_extractor.backbone.layer2.2.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.2.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.2.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.2.conv3.weight', 'module.image_feature_extractor.backbone.layer2.2.bn3.weight', 'module.image_feature_extractor.backbone.layer2.2.bn3.bias', 'module.image_feature_extractor.backbone.layer2.2.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.2.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.2.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.3.conv1.weight', 'module.image_feature_extractor.backbone.layer2.3.bn1.weight', 'module.image_feature_extractor.backbone.layer2.3.bn1.bias', 'module.image_feature_extractor.backbone.layer2.3.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.3.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.3.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.3.conv2.weight', 'module.image_feature_extractor.backbone.layer2.3.bn2.weight', 'module.image_feature_extractor.backbone.layer2.3.bn2.bias', 'module.image_feature_extractor.backbone.layer2.3.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.3.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.3.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.3.conv3.weight', 'module.image_feature_extractor.backbone.layer2.3.bn3.weight', 'module.image_feature_extractor.backbone.layer2.3.bn3.bias', 'module.image_feature_extractor.backbone.layer2.3.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.3.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.3.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.conv1.weight', 'module.image_feature_extractor.backbone.layer3.0.bn1.weight', 'module.image_feature_extractor.backbone.layer3.0.bn1.bias', 'module.image_feature_extractor.backbone.layer3.0.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.0.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.0.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.conv2.weight', 'module.image_feature_extractor.backbone.layer3.0.bn2.weight', 'module.image_feature_extractor.backbone.layer3.0.bn2.bias', 'module.image_feature_extractor.backbone.layer3.0.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.0.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.0.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.conv3.weight', 'module.image_feature_extractor.backbone.layer3.0.bn3.weight', 'module.image_feature_extractor.backbone.layer3.0.bn3.bias', 'module.image_feature_extractor.backbone.layer3.0.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.0.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.0.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.downsample.0.weight', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.weight', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.bias', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.running_mean', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.running_var', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.1.conv1.weight', 'module.image_feature_extractor.backbone.layer3.1.bn1.weight', 'module.image_feature_extractor.backbone.layer3.1.bn1.bias', 'module.image_feature_extractor.backbone.layer3.1.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.1.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.1.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.1.conv2.weight', 'module.image_feature_extractor.backbone.layer3.1.bn2.weight', 'module.image_feature_extractor.backbone.layer3.1.bn2.bias', 'module.image_feature_extractor.backbone.layer3.1.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.1.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.1.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.1.conv3.weight', 'module.image_feature_extractor.backbone.layer3.1.bn3.weight', 'module.image_feature_extractor.backbone.layer3.1.bn3.bias', 'module.image_feature_extractor.backbone.layer3.1.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.1.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.1.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.2.conv1.weight', 'module.image_feature_extractor.backbone.layer3.2.bn1.weight', 'module.image_feature_extractor.backbone.layer3.2.bn1.bias', 'module.image_feature_extractor.backbone.layer3.2.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.2.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.2.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.2.conv2.weight', 'module.image_feature_extractor.backbone.layer3.2.bn2.weight', 'module.image_feature_extractor.backbone.layer3.2.bn2.bias', 'module.image_feature_extractor.backbone.layer3.2.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.2.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.2.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.2.conv3.weight', 'module.image_feature_extractor.backbone.layer3.2.bn3.weight', 'module.image_feature_extractor.backbone.layer3.2.bn3.bias', 'module.image_feature_extractor.backbone.layer3.2.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.2.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.2.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.3.conv1.weight', 'module.image_feature_extractor.backbone.layer3.3.bn1.weight', 'module.image_feature_extractor.backbone.layer3.3.bn1.bias', 'module.image_feature_extractor.backbone.layer3.3.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.3.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.3.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.3.conv2.weight', 'module.image_feature_extractor.backbone.layer3.3.bn2.weight', 'module.image_feature_extractor.backbone.layer3.3.bn2.bias', 'module.image_feature_extractor.backbone.layer3.3.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.3.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.3.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.3.conv3.weight', 'module.image_feature_extractor.backbone.layer3.3.bn3.weight', 'module.image_feature_extractor.backbone.layer3.3.bn3.bias', 'module.image_feature_extractor.backbone.layer3.3.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.3.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.3.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.4.conv1.weight', 'module.image_feature_extractor.backbone.layer3.4.bn1.weight', 'module.image_feature_extractor.backbone.layer3.4.bn1.bias', 'module.image_feature_extractor.backbone.layer3.4.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.4.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.4.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.4.conv2.weight', 'module.image_feature_extractor.backbone.layer3.4.bn2.weight', 'module.image_feature_extractor.backbone.layer3.4.bn2.bias', 'module.image_feature_extractor.backbone.layer3.4.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.4.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.4.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.4.conv3.weight', 'module.image_feature_extractor.backbone.layer3.4.bn3.weight', 'module.image_feature_extractor.backbone.layer3.4.bn3.bias', 'module.image_feature_extractor.backbone.layer3.4.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.4.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.4.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.5.conv1.weight', 'module.image_feature_extractor.backbone.layer3.5.bn1.weight', 'module.image_feature_extractor.backbone.layer3.5.bn1.bias', 'module.image_feature_extractor.backbone.layer3.5.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.5.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.5.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.5.conv2.weight', 'module.image_feature_extractor.backbone.layer3.5.bn2.weight', 'module.image_feature_extractor.backbone.layer3.5.bn2.bias', 'module.image_feature_extractor.backbone.layer3.5.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.5.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.5.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.5.conv3.weight', 'module.image_feature_extractor.backbone.layer3.5.bn3.weight', 'module.image_feature_extractor.backbone.layer3.5.bn3.bias', 'module.image_feature_extractor.backbone.layer3.5.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.5.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.5.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.6.conv1.weight', 'module.image_feature_extractor.backbone.layer3.6.bn1.weight', 'module.image_feature_extractor.backbone.layer3.6.bn1.bias', 'module.image_feature_extractor.backbone.layer3.6.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.6.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.6.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.6.conv2.weight', 'module.image_feature_extractor.backbone.layer3.6.bn2.weight', 'module.image_feature_extractor.backbone.layer3.6.bn2.bias', 'module.image_feature_extractor.backbone.layer3.6.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.6.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.6.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.6.conv3.weight', 'module.image_feature_extractor.backbone.layer3.6.bn3.weight', 'module.image_feature_extractor.backbone.layer3.6.bn3.bias', 'module.image_feature_extractor.backbone.layer3.6.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.6.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.6.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.7.conv1.weight', 'module.image_feature_extractor.backbone.layer3.7.bn1.weight', 'module.image_feature_extractor.backbone.layer3.7.bn1.bias', 'module.image_feature_extractor.backbone.layer3.7.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.7.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.7.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.7.conv2.weight', 'module.image_feature_extractor.backbone.layer3.7.bn2.weight', 'module.image_feature_extractor.backbone.layer3.7.bn2.bias', 'module.image_feature_extractor.backbone.layer3.7.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.7.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.7.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.7.conv3.weight', 'module.image_feature_extractor.backbone.layer3.7.bn3.weight', 'module.image_feature_extractor.backbone.layer3.7.bn3.bias', 'module.image_feature_extractor.backbone.layer3.7.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.7.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.7.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.8.conv1.weight', 'module.image_feature_extractor.backbone.layer3.8.bn1.weight', 'module.image_feature_extractor.backbone.layer3.8.bn1.bias', 'module.image_feature_extractor.backbone.layer3.8.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.8.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.8.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.8.conv2.weight', 'module.image_feature_extractor.backbone.layer3.8.bn2.weight', 'module.image_feature_extractor.backbone.layer3.8.bn2.bias', 'module.image_feature_extractor.backbone.layer3.8.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.8.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.8.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.8.conv3.weight', 'module.image_feature_extractor.backbone.layer3.8.bn3.weight', 'module.image_feature_extractor.backbone.layer3.8.bn3.bias', 'module.image_feature_extractor.backbone.layer3.8.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.8.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.8.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.9.conv1.weight', 'module.image_feature_extractor.backbone.layer3.9.bn1.weight', 'module.image_feature_extractor.backbone.layer3.9.bn1.bias', 'module.image_feature_extractor.backbone.layer3.9.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.9.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.9.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.9.conv2.weight', 'module.image_feature_extractor.backbone.layer3.9.bn2.weight', 'module.image_feature_extractor.backbone.layer3.9.bn2.bias', 'module.image_feature_extractor.backbone.layer3.9.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.9.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.9.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.9.conv3.weight', 'module.image_feature_extractor.backbone.layer3.9.bn3.weight', 'module.image_feature_extractor.backbone.layer3.9.bn3.bias', 'module.image_feature_extractor.backbone.layer3.9.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.9.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.9.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.10.conv1.weight', 'module.image_feature_extractor.backbone.layer3.10.bn1.weight', 'module.image_feature_extractor.backbone.layer3.10.bn1.bias', 'module.image_feature_extractor.backbone.layer3.10.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.10.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.10.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.10.conv2.weight', 'module.image_feature_extractor.backbone.layer3.10.bn2.weight', 'module.image_feature_extractor.backbone.layer3.10.bn2.bias', 'module.image_feature_extractor.backbone.layer3.10.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.10.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.10.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.10.conv3.weight', 'module.image_feature_extractor.backbone.layer3.10.bn3.weight', 'module.image_feature_extractor.backbone.layer3.10.bn3.bias', 'module.image_feature_extractor.backbone.layer3.10.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.10.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.10.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.11.conv1.weight', 'module.image_feature_extractor.backbone.layer3.11.bn1.weight', 'module.image_feature_extractor.backbone.layer3.11.bn1.bias', 'module.image_feature_extractor.backbone.layer3.11.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.11.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.11.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.11.conv2.weight', 'module.image_feature_extractor.backbone.layer3.11.bn2.weight', 'module.image_feature_extractor.backbone.layer3.11.bn2.bias', 'module.image_feature_extractor.backbone.layer3.11.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.11.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.11.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.11.conv3.weight', 'module.image_feature_extractor.backbone.layer3.11.bn3.weight', 'module.image_feature_extractor.backbone.layer3.11.bn3.bias', 'module.image_feature_extractor.backbone.layer3.11.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.11.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.11.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.12.conv1.weight', 'module.image_feature_extractor.backbone.layer3.12.bn1.weight', 'module.image_feature_extractor.backbone.layer3.12.bn1.bias', 'module.image_feature_extractor.backbone.layer3.12.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.12.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.12.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.12.conv2.weight', 'module.image_feature_extractor.backbone.layer3.12.bn2.weight', 'module.image_feature_extractor.backbone.layer3.12.bn2.bias', 'module.image_feature_extractor.backbone.layer3.12.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.12.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.12.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.12.conv3.weight', 'module.image_feature_extractor.backbone.layer3.12.bn3.weight', 'module.image_feature_extractor.backbone.layer3.12.bn3.bias', 'module.image_feature_extractor.backbone.layer3.12.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.12.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.12.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.13.conv1.weight', 'module.image_feature_extractor.backbone.layer3.13.bn1.weight', 'module.image_feature_extractor.backbone.layer3.13.bn1.bias', 'module.image_feature_extractor.backbone.layer3.13.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.13.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.13.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.13.conv2.weight', 'module.image_feature_extractor.backbone.layer3.13.bn2.weight', 'module.image_feature_extractor.backbone.layer3.13.bn2.bias', 'module.image_feature_extractor.backbone.layer3.13.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.13.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.13.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.13.conv3.weight', 'module.image_feature_extractor.backbone.layer3.13.bn3.weight', 'module.image_feature_extractor.backbone.layer3.13.bn3.bias', 'module.image_feature_extractor.backbone.layer3.13.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.13.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.13.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.14.conv1.weight', 'module.image_feature_extractor.backbone.layer3.14.bn1.weight', 'module.image_feature_extractor.backbone.layer3.14.bn1.bias', 'module.image_feature_extractor.backbone.layer3.14.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.14.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.14.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.14.conv2.weight', 'module.image_feature_extractor.backbone.layer3.14.bn2.weight', 'module.image_feature_extractor.backbone.layer3.14.bn2.bias', 'module.image_feature_extractor.backbone.layer3.14.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.14.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.14.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.14.conv3.weight', 'module.image_feature_extractor.backbone.layer3.14.bn3.weight', 'module.image_feature_extractor.backbone.layer3.14.bn3.bias', 'module.image_feature_extractor.backbone.layer3.14.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.14.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.14.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.15.conv1.weight', 'module.image_feature_extractor.backbone.layer3.15.bn1.weight', 'module.image_feature_extractor.backbone.layer3.15.bn1.bias', 'module.image_feature_extractor.backbone.layer3.15.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.15.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.15.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.15.conv2.weight', 'module.image_feature_extractor.backbone.layer3.15.bn2.weight', 'module.image_feature_extractor.backbone.layer3.15.bn2.bias', 'module.image_feature_extractor.backbone.layer3.15.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.15.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.15.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.15.conv3.weight', 'module.image_feature_extractor.backbone.layer3.15.bn3.weight', 'module.image_feature_extractor.backbone.layer3.15.bn3.bias', 'module.image_feature_extractor.backbone.layer3.15.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.15.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.15.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.16.conv1.weight', 'module.image_feature_extractor.backbone.layer3.16.bn1.weight', 'module.image_feature_extractor.backbone.layer3.16.bn1.bias', 'module.image_feature_extractor.backbone.layer3.16.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.16.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.16.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.16.conv2.weight', 'module.image_feature_extractor.backbone.layer3.16.bn2.weight', 'module.image_feature_extractor.backbone.layer3.16.bn2.bias', 'module.image_feature_extractor.backbone.layer3.16.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.16.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.16.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.16.conv3.weight', 'module.image_feature_extractor.backbone.layer3.16.bn3.weight', 'module.image_feature_extractor.backbone.layer3.16.bn3.bias', 'module.image_feature_extractor.backbone.layer3.16.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.16.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.16.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.17.conv1.weight', 'module.image_feature_extractor.backbone.layer3.17.bn1.weight', 'module.image_feature_extractor.backbone.layer3.17.bn1.bias', 'module.image_feature_extractor.backbone.layer3.17.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.17.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.17.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.17.conv2.weight', 'module.image_feature_extractor.backbone.layer3.17.bn2.weight', 'module.image_feature_extractor.backbone.layer3.17.bn2.bias', 'module.image_feature_extractor.backbone.layer3.17.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.17.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.17.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.17.conv3.weight', 'module.image_feature_extractor.backbone.layer3.17.bn3.weight', 'module.image_feature_extractor.backbone.layer3.17.bn3.bias', 'module.image_feature_extractor.backbone.layer3.17.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.17.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.17.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.18.conv1.weight', 'module.image_feature_extractor.backbone.layer3.18.bn1.weight', 'module.image_feature_extractor.backbone.layer3.18.bn1.bias', 'module.image_feature_extractor.backbone.layer3.18.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.18.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.18.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.18.conv2.weight', 'module.image_feature_extractor.backbone.layer3.18.bn2.weight', 'module.image_feature_extractor.backbone.layer3.18.bn2.bias', 'module.image_feature_extractor.backbone.layer3.18.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.18.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.18.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.18.conv3.weight', 'module.image_feature_extractor.backbone.layer3.18.bn3.weight', 'module.image_feature_extractor.backbone.layer3.18.bn3.bias', 'module.image_feature_extractor.backbone.layer3.18.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.18.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.18.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.19.conv1.weight', 'module.image_feature_extractor.backbone.layer3.19.bn1.weight', 'module.image_feature_extractor.backbone.layer3.19.bn1.bias', 'module.image_feature_extractor.backbone.layer3.19.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.19.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.19.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.19.conv2.weight', 'module.image_feature_extractor.backbone.layer3.19.bn2.weight', 'module.image_feature_extractor.backbone.layer3.19.bn2.bias', 'module.image_feature_extractor.backbone.layer3.19.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.19.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.19.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.19.conv3.weight', 'module.image_feature_extractor.backbone.layer3.19.bn3.weight', 'module.image_feature_extractor.backbone.layer3.19.bn3.bias', 'module.image_feature_extractor.backbone.layer3.19.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.19.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.19.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.20.conv1.weight', 'module.image_feature_extractor.backbone.layer3.20.bn1.weight', 'module.image_feature_extractor.backbone.layer3.20.bn1.bias', 'module.image_feature_extractor.backbone.layer3.20.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.20.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.20.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.20.conv2.weight', 'module.image_feature_extractor.backbone.layer3.20.bn2.weight', 'module.image_feature_extractor.backbone.layer3.20.bn2.bias', 'module.image_feature_extractor.backbone.layer3.20.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.20.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.20.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.20.conv3.weight', 'module.image_feature_extractor.backbone.layer3.20.bn3.weight', 'module.image_feature_extractor.backbone.layer3.20.bn3.bias', 'module.image_feature_extractor.backbone.layer3.20.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.20.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.20.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.21.conv1.weight', 'module.image_feature_extractor.backbone.layer3.21.bn1.weight', 'module.image_feature_extractor.backbone.layer3.21.bn1.bias', 'module.image_feature_extractor.backbone.layer3.21.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.21.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.21.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.21.conv2.weight', 'module.image_feature_extractor.backbone.layer3.21.bn2.weight', 'module.image_feature_extractor.backbone.layer3.21.bn2.bias', 'module.image_feature_extractor.backbone.layer3.21.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.21.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.21.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.21.conv3.weight', 'module.image_feature_extractor.backbone.layer3.21.bn3.weight', 'module.image_feature_extractor.backbone.layer3.21.bn3.bias', 'module.image_feature_extractor.backbone.layer3.21.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.21.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.21.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.22.conv1.weight', 'module.image_feature_extractor.backbone.layer3.22.bn1.weight', 'module.image_feature_extractor.backbone.layer3.22.bn1.bias', 'module.image_feature_extractor.backbone.layer3.22.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.22.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.22.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.22.conv2.weight', 'module.image_feature_extractor.backbone.layer3.22.bn2.weight', 'module.image_feature_extractor.backbone.layer3.22.bn2.bias', 'module.image_feature_extractor.backbone.layer3.22.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.22.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.22.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.22.conv3.weight', 'module.image_feature_extractor.backbone.layer3.22.bn3.weight', 'module.image_feature_extractor.backbone.layer3.22.bn3.bias', 'module.image_feature_extractor.backbone.layer3.22.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.22.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.22.bn3.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.conv1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.conv2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.conv3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.0.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.1.conv1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.1.conv2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.bias', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.1.conv3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.bias', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.2.conv1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.2.conv2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.bias', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.2.conv3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.bias', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.num_batches_tracked', 'module.image_feature_extractor.head.0.0.conv1.weight', 'module.image_feature_extractor.head.0.0.bn1.weight', 'module.image_feature_extractor.head.0.0.bn1.bias', 'module.image_feature_extractor.head.0.0.bn1.running_mean', 'module.image_feature_extractor.head.0.0.bn1.running_var', 'module.image_feature_extractor.head.0.0.bn1.num_batches_tracked', 'module.image_feature_extractor.head.0.0.conv2.weight', 'module.image_feature_extractor.head.0.0.bn2.weight', 'module.image_feature_extractor.head.0.0.bn2.bias', 'module.image_feature_extractor.head.0.0.bn2.running_mean', 'module.image_feature_extractor.head.0.0.bn2.running_var', 'module.image_feature_extractor.head.0.0.bn2.num_batches_tracked', 'module.image_feature_extractor.head.0.0.conv3.weight', 'module.image_feature_extractor.head.0.0.bn3.weight', 'module.image_feature_extractor.head.0.0.bn3.bias', 'module.image_feature_extractor.head.0.0.bn3.running_mean', 'module.image_feature_extractor.head.0.0.bn3.running_var', 'module.image_feature_extractor.head.0.0.bn3.num_batches_tracked', 'module.image_feature_extractor.head.0.0.downsample.0.weight', 'module.image_feature_extractor.head.0.0.downsample.1.weight', 'module.image_feature_extractor.head.0.0.downsample.1.bias', 'module.image_feature_extractor.head.0.0.downsample.1.running_mean', 'module.image_feature_extractor.head.0.0.downsample.1.running_var', 'module.image_feature_extractor.head.0.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.head.0.1.conv1.weight', 'module.image_feature_extractor.head.0.1.bn1.weight', 'module.image_feature_extractor.head.0.1.bn1.bias', 'module.image_feature_extractor.head.0.1.bn1.running_mean', 'module.image_feature_extractor.head.0.1.bn1.running_var', 'module.image_feature_extractor.head.0.1.bn1.num_batches_tracked', 'module.image_feature_extractor.head.0.1.conv2.weight', 'module.image_feature_extractor.head.0.1.bn2.weight', 'module.image_feature_extractor.head.0.1.bn2.bias', 'module.image_feature_extractor.head.0.1.bn2.running_mean', 'module.image_feature_extractor.head.0.1.bn2.running_var', 'module.image_feature_extractor.head.0.1.bn2.num_batches_tracked', 'module.image_feature_extractor.head.0.1.conv3.weight', 'module.image_feature_extractor.head.0.1.bn3.weight', 'module.image_feature_extractor.head.0.1.bn3.bias', 'module.image_feature_extractor.head.0.1.bn3.running_mean', 'module.image_feature_extractor.head.0.1.bn3.running_var', 'module.image_feature_extractor.head.0.1.bn3.num_batches_tracked', 'module.image_feature_extractor.head.0.2.conv1.weight', 'module.image_feature_extractor.head.0.2.bn1.weight', 'module.image_feature_extractor.head.0.2.bn1.bias', 'module.image_feature_extractor.head.0.2.bn1.running_mean', 'module.image_feature_extractor.head.0.2.bn1.running_var', 'module.image_feature_extractor.head.0.2.bn1.num_batches_tracked', 'module.image_feature_extractor.head.0.2.conv2.weight', 'module.image_feature_extractor.head.0.2.bn2.weight', 'module.image_feature_extractor.head.0.2.bn2.bias', 'module.image_feature_extractor.head.0.2.bn2.running_mean', 'module.image_feature_extractor.head.0.2.bn2.running_var', 'module.image_feature_extractor.head.0.2.bn2.num_batches_tracked', 'module.image_feature_extractor.head.0.2.conv3.weight', 'module.image_feature_extractor.head.0.2.bn3.weight', 'module.image_feature_extractor.head.0.2.bn3.bias', 'module.image_feature_extractor.head.0.2.bn3.running_mean', 'module.image_feature_extractor.head.0.2.bn3.running_var', 'module.image_feature_extractor.head.0.2.bn3.num_batches_tracked', 'module.image_feature_extractor.obj_downsample.1.weight', 'module.image_feature_extractor.obj_downsample.1.bias', 'module.object_linguistic_embeddings.weight', 'module.vlbert.word_embeddings.weight', 'module.vlbert.end_embedding.weight', 'module.vlbert.position_embeddings.weight', 'module.vlbert.token_type_embeddings.weight', 'module.vlbert.embedding_LayerNorm.weight', 'module.vlbert.embedding_LayerNorm.bias', 'module.vlbert.visual_ln_text.weight', 'module.vlbert.visual_ln_text.bias', 'module.vlbert.visual_ln_object.weight', 'module.vlbert.visual_ln_object.bias', 'module.vlbert.encoder.layer.0.attention.self.query.weight', 'module.vlbert.encoder.layer.0.attention.self.query.bias', 'module.vlbert.encoder.layer.0.attention.self.key.weight', 'module.vlbert.encoder.layer.0.attention.self.key.bias', 'module.vlbert.encoder.layer.0.attention.self.value.weight', 'module.vlbert.encoder.layer.0.attention.self.value.bias', 'module.vlbert.encoder.layer.0.attention.output.dense.weight', 'module.vlbert.encoder.layer.0.attention.output.dense.bias', 'module.vlbert.encoder.layer.0.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.0.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.0.intermediate.dense.weight', 'module.vlbert.encoder.layer.0.intermediate.dense.bias', 'module.vlbert.encoder.layer.0.output.dense.weight', 'module.vlbert.encoder.layer.0.output.dense.bias', 'module.vlbert.encoder.layer.0.output.LayerNorm.weight', 'module.vlbert.encoder.layer.0.output.LayerNorm.bias', 'module.vlbert.encoder.layer.1.attention.self.query.weight', 'module.vlbert.encoder.layer.1.attention.self.query.bias', 'module.vlbert.encoder.layer.1.attention.self.key.weight', 'module.vlbert.encoder.layer.1.attention.self.key.bias', 'module.vlbert.encoder.layer.1.attention.self.value.weight', 'module.vlbert.encoder.layer.1.attention.self.value.bias', 'module.vlbert.encoder.layer.1.attention.output.dense.weight', 'module.vlbert.encoder.layer.1.attention.output.dense.bias', 'module.vlbert.encoder.layer.1.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.1.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.1.intermediate.dense.weight', 'module.vlbert.encoder.layer.1.intermediate.dense.bias', 'module.vlbert.encoder.layer.1.output.dense.weight', 'module.vlbert.encoder.layer.1.output.dense.bias', 'module.vlbert.encoder.layer.1.output.LayerNorm.weight', 'module.vlbert.encoder.layer.1.output.LayerNorm.bias', 'module.vlbert.encoder.layer.2.attention.self.query.weight', 'module.vlbert.encoder.layer.2.attention.self.query.bias', 'module.vlbert.encoder.layer.2.attention.self.key.weight', 'module.vlbert.encoder.layer.2.attention.self.key.bias', 'module.vlbert.encoder.layer.2.attention.self.value.weight', 'module.vlbert.encoder.layer.2.attention.self.value.bias', 'module.vlbert.encoder.layer.2.attention.output.dense.weight', 'module.vlbert.encoder.layer.2.attention.output.dense.bias', 'module.vlbert.encoder.layer.2.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.2.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.2.intermediate.dense.weight', 'module.vlbert.encoder.layer.2.intermediate.dense.bias', 'module.vlbert.encoder.layer.2.output.dense.weight', 'module.vlbert.encoder.layer.2.output.dense.bias', 'module.vlbert.encoder.layer.2.output.LayerNorm.weight', 'module.vlbert.encoder.layer.2.output.LayerNorm.bias', 'module.vlbert.encoder.layer.3.attention.self.query.weight', 'module.vlbert.encoder.layer.3.attention.self.query.bias', 'module.vlbert.encoder.layer.3.attention.self.key.weight', 'module.vlbert.encoder.layer.3.attention.self.key.bias', 'module.vlbert.encoder.layer.3.attention.self.value.weight', 'module.vlbert.encoder.layer.3.attention.self.value.bias', 'module.vlbert.encoder.layer.3.attention.output.dense.weight', 'module.vlbert.encoder.layer.3.attention.output.dense.bias', 'module.vlbert.encoder.layer.3.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.3.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.3.intermediate.dense.weight', 'module.vlbert.encoder.layer.3.intermediate.dense.bias', 'module.vlbert.encoder.layer.3.output.dense.weight', 'module.vlbert.encoder.layer.3.output.dense.bias', 'module.vlbert.encoder.layer.3.output.LayerNorm.weight', 'module.vlbert.encoder.layer.3.output.LayerNorm.bias', 'module.vlbert.encoder.layer.4.attention.self.query.weight', 'module.vlbert.encoder.layer.4.attention.self.query.bias', 'module.vlbert.encoder.layer.4.attention.self.key.weight', 'module.vlbert.encoder.layer.4.attention.self.key.bias', 'module.vlbert.encoder.layer.4.attention.self.value.weight', 'module.vlbert.encoder.layer.4.attention.self.value.bias', 'module.vlbert.encoder.layer.4.attention.output.dense.weight', 'module.vlbert.encoder.layer.4.attention.output.dense.bias', 'module.vlbert.encoder.layer.4.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.4.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.4.intermediate.dense.weight', 'module.vlbert.encoder.layer.4.intermediate.dense.bias', 'module.vlbert.encoder.layer.4.output.dense.weight', 'module.vlbert.encoder.layer.4.output.dense.bias', 'module.vlbert.encoder.layer.4.output.LayerNorm.weight', 'module.vlbert.encoder.layer.4.output.LayerNorm.bias', 'module.vlbert.encoder.layer.5.attention.self.query.weight', 'module.vlbert.encoder.layer.5.attention.self.query.bias', 'module.vlbert.encoder.layer.5.attention.self.key.weight', 'module.vlbert.encoder.layer.5.attention.self.key.bias', 'module.vlbert.encoder.layer.5.attention.self.value.weight', 'module.vlbert.encoder.layer.5.attention.self.value.bias', 'module.vlbert.encoder.layer.5.attention.output.dense.weight', 'module.vlbert.encoder.layer.5.attention.output.dense.bias', 'module.vlbert.encoder.layer.5.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.5.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.5.intermediate.dense.weight', 'module.vlbert.encoder.layer.5.intermediate.dense.bias', 'module.vlbert.encoder.layer.5.output.dense.weight', 'module.vlbert.encoder.layer.5.output.dense.bias', 'module.vlbert.encoder.layer.5.output.LayerNorm.weight', 'module.vlbert.encoder.layer.5.output.LayerNorm.bias', 'module.vlbert.encoder.layer.6.attention.self.query.weight', 'module.vlbert.encoder.layer.6.attention.self.query.bias', 'module.vlbert.encoder.layer.6.attention.self.key.weight', 'module.vlbert.encoder.layer.6.attention.self.key.bias', 'module.vlbert.encoder.layer.6.attention.self.value.weight', 'module.vlbert.encoder.layer.6.attention.self.value.bias', 'module.vlbert.encoder.layer.6.attention.output.dense.weight', 'module.vlbert.encoder.layer.6.attention.output.dense.bias', 'module.vlbert.encoder.layer.6.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.6.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.6.intermediate.dense.weight', 'module.vlbert.encoder.layer.6.intermediate.dense.bias', 'module.vlbert.encoder.layer.6.output.dense.weight', 'module.vlbert.encoder.layer.6.output.dense.bias', 'module.vlbert.encoder.layer.6.output.LayerNorm.weight', 'module.vlbert.encoder.layer.6.output.LayerNorm.bias', 'module.vlbert.encoder.layer.7.attention.self.query.weight', 'module.vlbert.encoder.layer.7.attention.self.query.bias', 'module.vlbert.encoder.layer.7.attention.self.key.weight', 'module.vlbert.encoder.layer.7.attention.self.key.bias', 'module.vlbert.encoder.layer.7.attention.self.value.weight', 'module.vlbert.encoder.layer.7.attention.self.value.bias', 'module.vlbert.encoder.layer.7.attention.output.dense.weight', 'module.vlbert.encoder.layer.7.attention.output.dense.bias', 'module.vlbert.encoder.layer.7.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.7.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.7.intermediate.dense.weight', 'module.vlbert.encoder.layer.7.intermediate.dense.bias', 'module.vlbert.encoder.layer.7.output.dense.weight', 'module.vlbert.encoder.layer.7.output.dense.bias', 'module.vlbert.encoder.layer.7.output.LayerNorm.weight', 'module.vlbert.encoder.layer.7.output.LayerNorm.bias', 'module.vlbert.encoder.layer.8.attention.self.query.weight', 'module.vlbert.encoder.layer.8.attention.self.query.bias', 'module.vlbert.encoder.layer.8.attention.self.key.weight', 'module.vlbert.encoder.layer.8.attention.self.key.bias', 'module.vlbert.encoder.layer.8.attention.self.value.weight', 'module.vlbert.encoder.layer.8.attention.self.value.bias', 'module.vlbert.encoder.layer.8.attention.output.dense.weight', 'module.vlbert.encoder.layer.8.attention.output.dense.bias', 'module.vlbert.encoder.layer.8.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.8.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.8.intermediate.dense.weight', 'module.vlbert.encoder.layer.8.intermediate.dense.bias', 'module.vlbert.encoder.layer.8.output.dense.weight', 'module.vlbert.encoder.layer.8.output.dense.bias', 'module.vlbert.encoder.layer.8.output.LayerNorm.weight', 'module.vlbert.encoder.layer.8.output.LayerNorm.bias', 'module.vlbert.encoder.layer.9.attention.self.query.weight', 'module.vlbert.encoder.layer.9.attention.self.query.bias', 'module.vlbert.encoder.layer.9.attention.self.key.weight', 'module.vlbert.encoder.layer.9.attention.self.key.bias', 'module.vlbert.encoder.layer.9.attention.self.value.weight', 'module.vlbert.encoder.layer.9.attention.self.value.bias', 'module.vlbert.encoder.layer.9.attention.output.dense.weight', 'module.vlbert.encoder.layer.9.attention.output.dense.bias', 'module.vlbert.encoder.layer.9.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.9.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.9.intermediate.dense.weight', 'module.vlbert.encoder.layer.9.intermediate.dense.bias', 'module.vlbert.encoder.layer.9.output.dense.weight', 'module.vlbert.encoder.layer.9.output.dense.bias', 'module.vlbert.encoder.layer.9.output.LayerNorm.weight', 'module.vlbert.encoder.layer.9.output.LayerNorm.bias', 'module.vlbert.encoder.layer.10.attention.self.query.weight', 'module.vlbert.encoder.layer.10.attention.self.query.bias', 'module.vlbert.encoder.layer.10.attention.self.key.weight', 'module.vlbert.encoder.layer.10.attention.self.key.bias', 'module.vlbert.encoder.layer.10.attention.self.value.weight', 'module.vlbert.encoder.layer.10.attention.self.value.bias', 'module.vlbert.encoder.layer.10.attention.output.dense.weight', 'module.vlbert.encoder.layer.10.attention.output.dense.bias', 'module.vlbert.encoder.layer.10.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.10.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.10.intermediate.dense.weight', 'module.vlbert.encoder.layer.10.intermediate.dense.bias', 'module.vlbert.encoder.layer.10.output.dense.weight', 'module.vlbert.encoder.layer.10.output.dense.bias', 'module.vlbert.encoder.layer.10.output.LayerNorm.weight', 'module.vlbert.encoder.layer.10.output.LayerNorm.bias', 'module.vlbert.encoder.layer.11.attention.self.query.weight', 'module.vlbert.encoder.layer.11.attention.self.query.bias', 'module.vlbert.encoder.layer.11.attention.self.key.weight', 'module.vlbert.encoder.layer.11.attention.self.key.bias', 'module.vlbert.encoder.layer.11.attention.self.value.weight', 'module.vlbert.encoder.layer.11.attention.self.value.bias', 'module.vlbert.encoder.layer.11.attention.output.dense.weight', 'module.vlbert.encoder.layer.11.attention.output.dense.bias', 'module.vlbert.encoder.layer.11.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.11.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.11.intermediate.dense.weight', 'module.vlbert.encoder.layer.11.intermediate.dense.bias', 'module.vlbert.encoder.layer.11.output.dense.weight', 'module.vlbert.encoder.layer.11.output.dense.bias', 'module.vlbert.encoder.layer.11.output.LayerNorm.weight', 'module.vlbert.encoder.layer.11.output.LayerNorm.bias', 'module.vlbert.mlm_head.predictions.bias', 'module.vlbert.mlm_head.predictions.transform.dense.weight', 'module.vlbert.mlm_head.predictions.transform.dense.bias', 'module.vlbert.mlm_head.predictions.transform.LayerNorm.weight', 'module.vlbert.mlm_head.predictions.transform.LayerNorm.bias', 'module.vlbert.mlm_head.predictions.decoder.weight'])
[Partial Load] non matched keys: ['object_mask_word_embedding.weight', 'aux_text_visual_embedding.weight', 'vlbert.mvrc_head.transform.dense.weight', 'vlbert.mvrc_head.transform.dense.bias', 'vlbert.mvrc_head.region_cls_pred.weight', 'vlbert.mvrc_head.region_cls_pred.bias']
[Partial Load] non pretrain keys: []
[Partial Load] partial load state dict of keys: dict_keys(['module.image_feature_extractor.backbone.conv1.weight', 'module.image_feature_extractor.backbone.bn1.weight', 'module.image_feature_extractor.backbone.bn1.bias', 'module.image_feature_extractor.backbone.bn1.running_mean', 'module.image_feature_extractor.backbone.bn1.running_var', 'module.image_feature_extractor.backbone.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.conv1.weight', 'module.image_feature_extractor.backbone.layer1.0.bn1.weight', 'module.image_feature_extractor.backbone.layer1.0.bn1.bias', 'module.image_feature_extractor.backbone.layer1.0.bn1.running_mean', 'module.image_feature_extractor.backbone.layer1.0.bn1.running_var', 'module.image_feature_extractor.backbone.layer1.0.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.conv2.weight', 'module.image_feature_extractor.backbone.layer1.0.bn2.weight', 'module.image_feature_extractor.backbone.layer1.0.bn2.bias', 'module.image_feature_extractor.backbone.layer1.0.bn2.running_mean', 'module.image_feature_extractor.backbone.layer1.0.bn2.running_var', 'module.image_feature_extractor.backbone.layer1.0.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.conv3.weight', 'module.image_feature_extractor.backbone.layer1.0.bn3.weight', 'module.image_feature_extractor.backbone.layer1.0.bn3.bias', 'module.image_feature_extractor.backbone.layer1.0.bn3.running_mean', 'module.image_feature_extractor.backbone.layer1.0.bn3.running_var', 'module.image_feature_extractor.backbone.layer1.0.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.0.downsample.0.weight', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.weight', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.bias', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.running_mean', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.running_var', 'module.image_feature_extractor.backbone.layer1.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.1.conv1.weight', 'module.image_feature_extractor.backbone.layer1.1.bn1.weight', 'module.image_feature_extractor.backbone.layer1.1.bn1.bias', 'module.image_feature_extractor.backbone.layer1.1.bn1.running_mean', 'module.image_feature_extractor.backbone.layer1.1.bn1.running_var', 'module.image_feature_extractor.backbone.layer1.1.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.1.conv2.weight', 'module.image_feature_extractor.backbone.layer1.1.bn2.weight', 'module.image_feature_extractor.backbone.layer1.1.bn2.bias', 'module.image_feature_extractor.backbone.layer1.1.bn2.running_mean', 'module.image_feature_extractor.backbone.layer1.1.bn2.running_var', 'module.image_feature_extractor.backbone.layer1.1.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.1.conv3.weight', 'module.image_feature_extractor.backbone.layer1.1.bn3.weight', 'module.image_feature_extractor.backbone.layer1.1.bn3.bias', 'module.image_feature_extractor.backbone.layer1.1.bn3.running_mean', 'module.image_feature_extractor.backbone.layer1.1.bn3.running_var', 'module.image_feature_extractor.backbone.layer1.1.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.2.conv1.weight', 'module.image_feature_extractor.backbone.layer1.2.bn1.weight', 'module.image_feature_extractor.backbone.layer1.2.bn1.bias', 'module.image_feature_extractor.backbone.layer1.2.bn1.running_mean', 'module.image_feature_extractor.backbone.layer1.2.bn1.running_var', 'module.image_feature_extractor.backbone.layer1.2.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.2.conv2.weight', 'module.image_feature_extractor.backbone.layer1.2.bn2.weight', 'module.image_feature_extractor.backbone.layer1.2.bn2.bias', 'module.image_feature_extractor.backbone.layer1.2.bn2.running_mean', 'module.image_feature_extractor.backbone.layer1.2.bn2.running_var', 'module.image_feature_extractor.backbone.layer1.2.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer1.2.conv3.weight', 'module.image_feature_extractor.backbone.layer1.2.bn3.weight', 'module.image_feature_extractor.backbone.layer1.2.bn3.bias', 'module.image_feature_extractor.backbone.layer1.2.bn3.running_mean', 'module.image_feature_extractor.backbone.layer1.2.bn3.running_var', 'module.image_feature_extractor.backbone.layer1.2.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.conv1.weight', 'module.image_feature_extractor.backbone.layer2.0.bn1.weight', 'module.image_feature_extractor.backbone.layer2.0.bn1.bias', 'module.image_feature_extractor.backbone.layer2.0.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.0.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.0.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.conv2.weight', 'module.image_feature_extractor.backbone.layer2.0.bn2.weight', 'module.image_feature_extractor.backbone.layer2.0.bn2.bias', 'module.image_feature_extractor.backbone.layer2.0.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.0.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.0.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.conv3.weight', 'module.image_feature_extractor.backbone.layer2.0.bn3.weight', 'module.image_feature_extractor.backbone.layer2.0.bn3.bias', 'module.image_feature_extractor.backbone.layer2.0.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.0.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.0.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.0.downsample.0.weight', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.weight', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.bias', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.running_mean', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.running_var', 'module.image_feature_extractor.backbone.layer2.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.1.conv1.weight', 'module.image_feature_extractor.backbone.layer2.1.bn1.weight', 'module.image_feature_extractor.backbone.layer2.1.bn1.bias', 'module.image_feature_extractor.backbone.layer2.1.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.1.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.1.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.1.conv2.weight', 'module.image_feature_extractor.backbone.layer2.1.bn2.weight', 'module.image_feature_extractor.backbone.layer2.1.bn2.bias', 'module.image_feature_extractor.backbone.layer2.1.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.1.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.1.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.1.conv3.weight', 'module.image_feature_extractor.backbone.layer2.1.bn3.weight', 'module.image_feature_extractor.backbone.layer2.1.bn3.bias', 'module.image_feature_extractor.backbone.layer2.1.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.1.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.1.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.2.conv1.weight', 'module.image_feature_extractor.backbone.layer2.2.bn1.weight', 'module.image_feature_extractor.backbone.layer2.2.bn1.bias', 'module.image_feature_extractor.backbone.layer2.2.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.2.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.2.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.2.conv2.weight', 'module.image_feature_extractor.backbone.layer2.2.bn2.weight', 'module.image_feature_extractor.backbone.layer2.2.bn2.bias', 'module.image_feature_extractor.backbone.layer2.2.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.2.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.2.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.2.conv3.weight', 'module.image_feature_extractor.backbone.layer2.2.bn3.weight', 'module.image_feature_extractor.backbone.layer2.2.bn3.bias', 'module.image_feature_extractor.backbone.layer2.2.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.2.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.2.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.3.conv1.weight', 'module.image_feature_extractor.backbone.layer2.3.bn1.weight', 'module.image_feature_extractor.backbone.layer2.3.bn1.bias', 'module.image_feature_extractor.backbone.layer2.3.bn1.running_mean', 'module.image_feature_extractor.backbone.layer2.3.bn1.running_var', 'module.image_feature_extractor.backbone.layer2.3.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.3.conv2.weight', 'module.image_feature_extractor.backbone.layer2.3.bn2.weight', 'module.image_feature_extractor.backbone.layer2.3.bn2.bias', 'module.image_feature_extractor.backbone.layer2.3.bn2.running_mean', 'module.image_feature_extractor.backbone.layer2.3.bn2.running_var', 'module.image_feature_extractor.backbone.layer2.3.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer2.3.conv3.weight', 'module.image_feature_extractor.backbone.layer2.3.bn3.weight', 'module.image_feature_extractor.backbone.layer2.3.bn3.bias', 'module.image_feature_extractor.backbone.layer2.3.bn3.running_mean', 'module.image_feature_extractor.backbone.layer2.3.bn3.running_var', 'module.image_feature_extractor.backbone.layer2.3.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.conv1.weight', 'module.image_feature_extractor.backbone.layer3.0.bn1.weight', 'module.image_feature_extractor.backbone.layer3.0.bn1.bias', 'module.image_feature_extractor.backbone.layer3.0.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.0.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.0.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.conv2.weight', 'module.image_feature_extractor.backbone.layer3.0.bn2.weight', 'module.image_feature_extractor.backbone.layer3.0.bn2.bias', 'module.image_feature_extractor.backbone.layer3.0.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.0.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.0.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.conv3.weight', 'module.image_feature_extractor.backbone.layer3.0.bn3.weight', 'module.image_feature_extractor.backbone.layer3.0.bn3.bias', 'module.image_feature_extractor.backbone.layer3.0.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.0.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.0.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.0.downsample.0.weight', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.weight', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.bias', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.running_mean', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.running_var', 'module.image_feature_extractor.backbone.layer3.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.1.conv1.weight', 'module.image_feature_extractor.backbone.layer3.1.bn1.weight', 'module.image_feature_extractor.backbone.layer3.1.bn1.bias', 'module.image_feature_extractor.backbone.layer3.1.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.1.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.1.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.1.conv2.weight', 'module.image_feature_extractor.backbone.layer3.1.bn2.weight', 'module.image_feature_extractor.backbone.layer3.1.bn2.bias', 'module.image_feature_extractor.backbone.layer3.1.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.1.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.1.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.1.conv3.weight', 'module.image_feature_extractor.backbone.layer3.1.bn3.weight', 'module.image_feature_extractor.backbone.layer3.1.bn3.bias', 'module.image_feature_extractor.backbone.layer3.1.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.1.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.1.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.2.conv1.weight', 'module.image_feature_extractor.backbone.layer3.2.bn1.weight', 'module.image_feature_extractor.backbone.layer3.2.bn1.bias', 'module.image_feature_extractor.backbone.layer3.2.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.2.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.2.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.2.conv2.weight', 'module.image_feature_extractor.backbone.layer3.2.bn2.weight', 'module.image_feature_extractor.backbone.layer3.2.bn2.bias', 'module.image_feature_extractor.backbone.layer3.2.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.2.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.2.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.2.conv3.weight', 'module.image_feature_extractor.backbone.layer3.2.bn3.weight', 'module.image_feature_extractor.backbone.layer3.2.bn3.bias', 'module.image_feature_extractor.backbone.layer3.2.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.2.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.2.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.3.conv1.weight', 'module.image_feature_extractor.backbone.layer3.3.bn1.weight', 'module.image_feature_extractor.backbone.layer3.3.bn1.bias', 'module.image_feature_extractor.backbone.layer3.3.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.3.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.3.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.3.conv2.weight', 'module.image_feature_extractor.backbone.layer3.3.bn2.weight', 'module.image_feature_extractor.backbone.layer3.3.bn2.bias', 'module.image_feature_extractor.backbone.layer3.3.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.3.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.3.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.3.conv3.weight', 'module.image_feature_extractor.backbone.layer3.3.bn3.weight', 'module.image_feature_extractor.backbone.layer3.3.bn3.bias', 'module.image_feature_extractor.backbone.layer3.3.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.3.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.3.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.4.conv1.weight', 'module.image_feature_extractor.backbone.layer3.4.bn1.weight', 'module.image_feature_extractor.backbone.layer3.4.bn1.bias', 'module.image_feature_extractor.backbone.layer3.4.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.4.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.4.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.4.conv2.weight', 'module.image_feature_extractor.backbone.layer3.4.bn2.weight', 'module.image_feature_extractor.backbone.layer3.4.bn2.bias', 'module.image_feature_extractor.backbone.layer3.4.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.4.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.4.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.4.conv3.weight', 'module.image_feature_extractor.backbone.layer3.4.bn3.weight', 'module.image_feature_extractor.backbone.layer3.4.bn3.bias', 'module.image_feature_extractor.backbone.layer3.4.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.4.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.4.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.5.conv1.weight', 'module.image_feature_extractor.backbone.layer3.5.bn1.weight', 'module.image_feature_extractor.backbone.layer3.5.bn1.bias', 'module.image_feature_extractor.backbone.layer3.5.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.5.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.5.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.5.conv2.weight', 'module.image_feature_extractor.backbone.layer3.5.bn2.weight', 'module.image_feature_extractor.backbone.layer3.5.bn2.bias', 'module.image_feature_extractor.backbone.layer3.5.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.5.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.5.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.5.conv3.weight', 'module.image_feature_extractor.backbone.layer3.5.bn3.weight', 'module.image_feature_extractor.backbone.layer3.5.bn3.bias', 'module.image_feature_extractor.backbone.layer3.5.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.5.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.5.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.6.conv1.weight', 'module.image_feature_extractor.backbone.layer3.6.bn1.weight', 'module.image_feature_extractor.backbone.layer3.6.bn1.bias', 'module.image_feature_extractor.backbone.layer3.6.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.6.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.6.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.6.conv2.weight', 'module.image_feature_extractor.backbone.layer3.6.bn2.weight', 'module.image_feature_extractor.backbone.layer3.6.bn2.bias', 'module.image_feature_extractor.backbone.layer3.6.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.6.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.6.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.6.conv3.weight', 'module.image_feature_extractor.backbone.layer3.6.bn3.weight', 'module.image_feature_extractor.backbone.layer3.6.bn3.bias', 'module.image_feature_extractor.backbone.layer3.6.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.6.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.6.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.7.conv1.weight', 'module.image_feature_extractor.backbone.layer3.7.bn1.weight', 'module.image_feature_extractor.backbone.layer3.7.bn1.bias', 'module.image_feature_extractor.backbone.layer3.7.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.7.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.7.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.7.conv2.weight', 'module.image_feature_extractor.backbone.layer3.7.bn2.weight', 'module.image_feature_extractor.backbone.layer3.7.bn2.bias', 'module.image_feature_extractor.backbone.layer3.7.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.7.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.7.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.7.conv3.weight', 'module.image_feature_extractor.backbone.layer3.7.bn3.weight', 'module.image_feature_extractor.backbone.layer3.7.bn3.bias', 'module.image_feature_extractor.backbone.layer3.7.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.7.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.7.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.8.conv1.weight', 'module.image_feature_extractor.backbone.layer3.8.bn1.weight', 'module.image_feature_extractor.backbone.layer3.8.bn1.bias', 'module.image_feature_extractor.backbone.layer3.8.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.8.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.8.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.8.conv2.weight', 'module.image_feature_extractor.backbone.layer3.8.bn2.weight', 'module.image_feature_extractor.backbone.layer3.8.bn2.bias', 'module.image_feature_extractor.backbone.layer3.8.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.8.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.8.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.8.conv3.weight', 'module.image_feature_extractor.backbone.layer3.8.bn3.weight', 'module.image_feature_extractor.backbone.layer3.8.bn3.bias', 'module.image_feature_extractor.backbone.layer3.8.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.8.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.8.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.9.conv1.weight', 'module.image_feature_extractor.backbone.layer3.9.bn1.weight', 'module.image_feature_extractor.backbone.layer3.9.bn1.bias', 'module.image_feature_extractor.backbone.layer3.9.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.9.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.9.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.9.conv2.weight', 'module.image_feature_extractor.backbone.layer3.9.bn2.weight', 'module.image_feature_extractor.backbone.layer3.9.bn2.bias', 'module.image_feature_extractor.backbone.layer3.9.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.9.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.9.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.9.conv3.weight', 'module.image_feature_extractor.backbone.layer3.9.bn3.weight', 'module.image_feature_extractor.backbone.layer3.9.bn3.bias', 'module.image_feature_extractor.backbone.layer3.9.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.9.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.9.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.10.conv1.weight', 'module.image_feature_extractor.backbone.layer3.10.bn1.weight', 'module.image_feature_extractor.backbone.layer3.10.bn1.bias', 'module.image_feature_extractor.backbone.layer3.10.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.10.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.10.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.10.conv2.weight', 'module.image_feature_extractor.backbone.layer3.10.bn2.weight', 'module.image_feature_extractor.backbone.layer3.10.bn2.bias', 'module.image_feature_extractor.backbone.layer3.10.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.10.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.10.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.10.conv3.weight', 'module.image_feature_extractor.backbone.layer3.10.bn3.weight', 'module.image_feature_extractor.backbone.layer3.10.bn3.bias', 'module.image_feature_extractor.backbone.layer3.10.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.10.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.10.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.11.conv1.weight', 'module.image_feature_extractor.backbone.layer3.11.bn1.weight', 'module.image_feature_extractor.backbone.layer3.11.bn1.bias', 'module.image_feature_extractor.backbone.layer3.11.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.11.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.11.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.11.conv2.weight', 'module.image_feature_extractor.backbone.layer3.11.bn2.weight', 'module.image_feature_extractor.backbone.layer3.11.bn2.bias', 'module.image_feature_extractor.backbone.layer3.11.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.11.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.11.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.11.conv3.weight', 'module.image_feature_extractor.backbone.layer3.11.bn3.weight', 'module.image_feature_extractor.backbone.layer3.11.bn3.bias', 'module.image_feature_extractor.backbone.layer3.11.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.11.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.11.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.12.conv1.weight', 'module.image_feature_extractor.backbone.layer3.12.bn1.weight', 'module.image_feature_extractor.backbone.layer3.12.bn1.bias', 'module.image_feature_extractor.backbone.layer3.12.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.12.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.12.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.12.conv2.weight', 'module.image_feature_extractor.backbone.layer3.12.bn2.weight', 'module.image_feature_extractor.backbone.layer3.12.bn2.bias', 'module.image_feature_extractor.backbone.layer3.12.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.12.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.12.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.12.conv3.weight', 'module.image_feature_extractor.backbone.layer3.12.bn3.weight', 'module.image_feature_extractor.backbone.layer3.12.bn3.bias', 'module.image_feature_extractor.backbone.layer3.12.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.12.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.12.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.13.conv1.weight', 'module.image_feature_extractor.backbone.layer3.13.bn1.weight', 'module.image_feature_extractor.backbone.layer3.13.bn1.bias', 'module.image_feature_extractor.backbone.layer3.13.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.13.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.13.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.13.conv2.weight', 'module.image_feature_extractor.backbone.layer3.13.bn2.weight', 'module.image_feature_extractor.backbone.layer3.13.bn2.bias', 'module.image_feature_extractor.backbone.layer3.13.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.13.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.13.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.13.conv3.weight', 'module.image_feature_extractor.backbone.layer3.13.bn3.weight', 'module.image_feature_extractor.backbone.layer3.13.bn3.bias', 'module.image_feature_extractor.backbone.layer3.13.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.13.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.13.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.14.conv1.weight', 'module.image_feature_extractor.backbone.layer3.14.bn1.weight', 'module.image_feature_extractor.backbone.layer3.14.bn1.bias', 'module.image_feature_extractor.backbone.layer3.14.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.14.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.14.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.14.conv2.weight', 'module.image_feature_extractor.backbone.layer3.14.bn2.weight', 'module.image_feature_extractor.backbone.layer3.14.bn2.bias', 'module.image_feature_extractor.backbone.layer3.14.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.14.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.14.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.14.conv3.weight', 'module.image_feature_extractor.backbone.layer3.14.bn3.weight', 'module.image_feature_extractor.backbone.layer3.14.bn3.bias', 'module.image_feature_extractor.backbone.layer3.14.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.14.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.14.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.15.conv1.weight', 'module.image_feature_extractor.backbone.layer3.15.bn1.weight', 'module.image_feature_extractor.backbone.layer3.15.bn1.bias', 'module.image_feature_extractor.backbone.layer3.15.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.15.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.15.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.15.conv2.weight', 'module.image_feature_extractor.backbone.layer3.15.bn2.weight', 'module.image_feature_extractor.backbone.layer3.15.bn2.bias', 'module.image_feature_extractor.backbone.layer3.15.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.15.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.15.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.15.conv3.weight', 'module.image_feature_extractor.backbone.layer3.15.bn3.weight', 'module.image_feature_extractor.backbone.layer3.15.bn3.bias', 'module.image_feature_extractor.backbone.layer3.15.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.15.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.15.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.16.conv1.weight', 'module.image_feature_extractor.backbone.layer3.16.bn1.weight', 'module.image_feature_extractor.backbone.layer3.16.bn1.bias', 'module.image_feature_extractor.backbone.layer3.16.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.16.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.16.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.16.conv2.weight', 'module.image_feature_extractor.backbone.layer3.16.bn2.weight', 'module.image_feature_extractor.backbone.layer3.16.bn2.bias', 'module.image_feature_extractor.backbone.layer3.16.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.16.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.16.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.16.conv3.weight', 'module.image_feature_extractor.backbone.layer3.16.bn3.weight', 'module.image_feature_extractor.backbone.layer3.16.bn3.bias', 'module.image_feature_extractor.backbone.layer3.16.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.16.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.16.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.17.conv1.weight', 'module.image_feature_extractor.backbone.layer3.17.bn1.weight', 'module.image_feature_extractor.backbone.layer3.17.bn1.bias', 'module.image_feature_extractor.backbone.layer3.17.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.17.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.17.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.17.conv2.weight', 'module.image_feature_extractor.backbone.layer3.17.bn2.weight', 'module.image_feature_extractor.backbone.layer3.17.bn2.bias', 'module.image_feature_extractor.backbone.layer3.17.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.17.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.17.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.17.conv3.weight', 'module.image_feature_extractor.backbone.layer3.17.bn3.weight', 'module.image_feature_extractor.backbone.layer3.17.bn3.bias', 'module.image_feature_extractor.backbone.layer3.17.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.17.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.17.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.18.conv1.weight', 'module.image_feature_extractor.backbone.layer3.18.bn1.weight', 'module.image_feature_extractor.backbone.layer3.18.bn1.bias', 'module.image_feature_extractor.backbone.layer3.18.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.18.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.18.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.18.conv2.weight', 'module.image_feature_extractor.backbone.layer3.18.bn2.weight', 'module.image_feature_extractor.backbone.layer3.18.bn2.bias', 'module.image_feature_extractor.backbone.layer3.18.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.18.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.18.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.18.conv3.weight', 'module.image_feature_extractor.backbone.layer3.18.bn3.weight', 'module.image_feature_extractor.backbone.layer3.18.bn3.bias', 'module.image_feature_extractor.backbone.layer3.18.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.18.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.18.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.19.conv1.weight', 'module.image_feature_extractor.backbone.layer3.19.bn1.weight', 'module.image_feature_extractor.backbone.layer3.19.bn1.bias', 'module.image_feature_extractor.backbone.layer3.19.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.19.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.19.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.19.conv2.weight', 'module.image_feature_extractor.backbone.layer3.19.bn2.weight', 'module.image_feature_extractor.backbone.layer3.19.bn2.bias', 'module.image_feature_extractor.backbone.layer3.19.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.19.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.19.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.19.conv3.weight', 'module.image_feature_extractor.backbone.layer3.19.bn3.weight', 'module.image_feature_extractor.backbone.layer3.19.bn3.bias', 'module.image_feature_extractor.backbone.layer3.19.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.19.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.19.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.20.conv1.weight', 'module.image_feature_extractor.backbone.layer3.20.bn1.weight', 'module.image_feature_extractor.backbone.layer3.20.bn1.bias', 'module.image_feature_extractor.backbone.layer3.20.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.20.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.20.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.20.conv2.weight', 'module.image_feature_extractor.backbone.layer3.20.bn2.weight', 'module.image_feature_extractor.backbone.layer3.20.bn2.bias', 'module.image_feature_extractor.backbone.layer3.20.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.20.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.20.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.20.conv3.weight', 'module.image_feature_extractor.backbone.layer3.20.bn3.weight', 'module.image_feature_extractor.backbone.layer3.20.bn3.bias', 'module.image_feature_extractor.backbone.layer3.20.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.20.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.20.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.21.conv1.weight', 'module.image_feature_extractor.backbone.layer3.21.bn1.weight', 'module.image_feature_extractor.backbone.layer3.21.bn1.bias', 'module.image_feature_extractor.backbone.layer3.21.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.21.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.21.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.21.conv2.weight', 'module.image_feature_extractor.backbone.layer3.21.bn2.weight', 'module.image_feature_extractor.backbone.layer3.21.bn2.bias', 'module.image_feature_extractor.backbone.layer3.21.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.21.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.21.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.21.conv3.weight', 'module.image_feature_extractor.backbone.layer3.21.bn3.weight', 'module.image_feature_extractor.backbone.layer3.21.bn3.bias', 'module.image_feature_extractor.backbone.layer3.21.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.21.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.21.bn3.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.22.conv1.weight', 'module.image_feature_extractor.backbone.layer3.22.bn1.weight', 'module.image_feature_extractor.backbone.layer3.22.bn1.bias', 'module.image_feature_extractor.backbone.layer3.22.bn1.running_mean', 'module.image_feature_extractor.backbone.layer3.22.bn1.running_var', 'module.image_feature_extractor.backbone.layer3.22.bn1.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.22.conv2.weight', 'module.image_feature_extractor.backbone.layer3.22.bn2.weight', 'module.image_feature_extractor.backbone.layer3.22.bn2.bias', 'module.image_feature_extractor.backbone.layer3.22.bn2.running_mean', 'module.image_feature_extractor.backbone.layer3.22.bn2.running_var', 'module.image_feature_extractor.backbone.layer3.22.bn2.num_batches_tracked', 'module.image_feature_extractor.backbone.layer3.22.conv3.weight', 'module.image_feature_extractor.backbone.layer3.22.bn3.weight', 'module.image_feature_extractor.backbone.layer3.22.bn3.bias', 'module.image_feature_extractor.backbone.layer3.22.bn3.running_mean', 'module.image_feature_extractor.backbone.layer3.22.bn3.running_var', 'module.image_feature_extractor.backbone.layer3.22.bn3.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.conv1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.conv2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn2.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.conv3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.bn3.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.0.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.1.conv1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.1.conv2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.bias', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn2.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.1.conv3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.bias', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.1.bn3.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.2.conv1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.bias', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn1.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.2.conv2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.bias', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn2.num_batches_tracked', 'module.image_feature_extractor.roi_head_feature_extractor.2.conv3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.weight', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.bias', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.running_mean', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.running_var', 'module.image_feature_extractor.roi_head_feature_extractor.2.bn3.num_batches_tracked', 'module.image_feature_extractor.head.0.0.conv1.weight', 'module.image_feature_extractor.head.0.0.bn1.weight', 'module.image_feature_extractor.head.0.0.bn1.bias', 'module.image_feature_extractor.head.0.0.bn1.running_mean', 'module.image_feature_extractor.head.0.0.bn1.running_var', 'module.image_feature_extractor.head.0.0.bn1.num_batches_tracked', 'module.image_feature_extractor.head.0.0.conv2.weight', 'module.image_feature_extractor.head.0.0.bn2.weight', 'module.image_feature_extractor.head.0.0.bn2.bias', 'module.image_feature_extractor.head.0.0.bn2.running_mean', 'module.image_feature_extractor.head.0.0.bn2.running_var', 'module.image_feature_extractor.head.0.0.bn2.num_batches_tracked', 'module.image_feature_extractor.head.0.0.conv3.weight', 'module.image_feature_extractor.head.0.0.bn3.weight', 'module.image_feature_extractor.head.0.0.bn3.bias', 'module.image_feature_extractor.head.0.0.bn3.running_mean', 'module.image_feature_extractor.head.0.0.bn3.running_var', 'module.image_feature_extractor.head.0.0.bn3.num_batches_tracked', 'module.image_feature_extractor.head.0.0.downsample.0.weight', 'module.image_feature_extractor.head.0.0.downsample.1.weight', 'module.image_feature_extractor.head.0.0.downsample.1.bias', 'module.image_feature_extractor.head.0.0.downsample.1.running_mean', 'module.image_feature_extractor.head.0.0.downsample.1.running_var', 'module.image_feature_extractor.head.0.0.downsample.1.num_batches_tracked', 'module.image_feature_extractor.head.0.1.conv1.weight', 'module.image_feature_extractor.head.0.1.bn1.weight', 'module.image_feature_extractor.head.0.1.bn1.bias', 'module.image_feature_extractor.head.0.1.bn1.running_mean', 'module.image_feature_extractor.head.0.1.bn1.running_var', 'module.image_feature_extractor.head.0.1.bn1.num_batches_tracked', 'module.image_feature_extractor.head.0.1.conv2.weight', 'module.image_feature_extractor.head.0.1.bn2.weight', 'module.image_feature_extractor.head.0.1.bn2.bias', 'module.image_feature_extractor.head.0.1.bn2.running_mean', 'module.image_feature_extractor.head.0.1.bn2.running_var', 'module.image_feature_extractor.head.0.1.bn2.num_batches_tracked', 'module.image_feature_extractor.head.0.1.conv3.weight', 'module.image_feature_extractor.head.0.1.bn3.weight', 'module.image_feature_extractor.head.0.1.bn3.bias', 'module.image_feature_extractor.head.0.1.bn3.running_mean', 'module.image_feature_extractor.head.0.1.bn3.running_var', 'module.image_feature_extractor.head.0.1.bn3.num_batches_tracked', 'module.image_feature_extractor.head.0.2.conv1.weight', 'module.image_feature_extractor.head.0.2.bn1.weight', 'module.image_feature_extractor.head.0.2.bn1.bias', 'module.image_feature_extractor.head.0.2.bn1.running_mean', 'module.image_feature_extractor.head.0.2.bn1.running_var', 'module.image_feature_extractor.head.0.2.bn1.num_batches_tracked', 'module.image_feature_extractor.head.0.2.conv2.weight', 'module.image_feature_extractor.head.0.2.bn2.weight', 'module.image_feature_extractor.head.0.2.bn2.bias', 'module.image_feature_extractor.head.0.2.bn2.running_mean', 'module.image_feature_extractor.head.0.2.bn2.running_var', 'module.image_feature_extractor.head.0.2.bn2.num_batches_tracked', 'module.image_feature_extractor.head.0.2.conv3.weight', 'module.image_feature_extractor.head.0.2.bn3.weight', 'module.image_feature_extractor.head.0.2.bn3.bias', 'module.image_feature_extractor.head.0.2.bn3.running_mean', 'module.image_feature_extractor.head.0.2.bn3.running_var', 'module.image_feature_extractor.head.0.2.bn3.num_batches_tracked', 'module.image_feature_extractor.obj_downsample.1.weight', 'module.image_feature_extractor.obj_downsample.1.bias', 'module.object_linguistic_embeddings.weight', 'module.vlbert.word_embeddings.weight', 'module.vlbert.end_embedding.weight', 'module.vlbert.position_embeddings.weight', 'module.vlbert.token_type_embeddings.weight', 'module.vlbert.embedding_LayerNorm.weight', 'module.vlbert.embedding_LayerNorm.bias', 'module.vlbert.visual_ln_text.weight', 'module.vlbert.visual_ln_text.bias', 'module.vlbert.visual_ln_object.weight', 'module.vlbert.visual_ln_object.bias', 'module.vlbert.encoder.layer.0.attention.self.query.weight', 'module.vlbert.encoder.layer.0.attention.self.query.bias', 'module.vlbert.encoder.layer.0.attention.self.key.weight', 'module.vlbert.encoder.layer.0.attention.self.key.bias', 'module.vlbert.encoder.layer.0.attention.self.value.weight', 'module.vlbert.encoder.layer.0.attention.self.value.bias', 'module.vlbert.encoder.layer.0.attention.output.dense.weight', 'module.vlbert.encoder.layer.0.attention.output.dense.bias', 'module.vlbert.encoder.layer.0.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.0.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.0.intermediate.dense.weight', 'module.vlbert.encoder.layer.0.intermediate.dense.bias', 'module.vlbert.encoder.layer.0.output.dense.weight', 'module.vlbert.encoder.layer.0.output.dense.bias', 'module.vlbert.encoder.layer.0.output.LayerNorm.weight', 'module.vlbert.encoder.layer.0.output.LayerNorm.bias', 'module.vlbert.encoder.layer.1.attention.self.query.weight', 'module.vlbert.encoder.layer.1.attention.self.query.bias', 'module.vlbert.encoder.layer.1.attention.self.key.weight', 'module.vlbert.encoder.layer.1.attention.self.key.bias', 'module.vlbert.encoder.layer.1.attention.self.value.weight', 'module.vlbert.encoder.layer.1.attention.self.value.bias', 'module.vlbert.encoder.layer.1.attention.output.dense.weight', 'module.vlbert.encoder.layer.1.attention.output.dense.bias', 'module.vlbert.encoder.layer.1.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.1.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.1.intermediate.dense.weight', 'module.vlbert.encoder.layer.1.intermediate.dense.bias', 'module.vlbert.encoder.layer.1.output.dense.weight', 'module.vlbert.encoder.layer.1.output.dense.bias', 'module.vlbert.encoder.layer.1.output.LayerNorm.weight', 'module.vlbert.encoder.layer.1.output.LayerNorm.bias', 'module.vlbert.encoder.layer.2.attention.self.query.weight', 'module.vlbert.encoder.layer.2.attention.self.query.bias', 'module.vlbert.encoder.layer.2.attention.self.key.weight', 'module.vlbert.encoder.layer.2.attention.self.key.bias', 'module.vlbert.encoder.layer.2.attention.self.value.weight', 'module.vlbert.encoder.layer.2.attention.self.value.bias', 'module.vlbert.encoder.layer.2.attention.output.dense.weight', 'module.vlbert.encoder.layer.2.attention.output.dense.bias', 'module.vlbert.encoder.layer.2.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.2.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.2.intermediate.dense.weight', 'module.vlbert.encoder.layer.2.intermediate.dense.bias', 'module.vlbert.encoder.layer.2.output.dense.weight', 'module.vlbert.encoder.layer.2.output.dense.bias', 'module.vlbert.encoder.layer.2.output.LayerNorm.weight', 'module.vlbert.encoder.layer.2.output.LayerNorm.bias', 'module.vlbert.encoder.layer.3.attention.self.query.weight', 'module.vlbert.encoder.layer.3.attention.self.query.bias', 'module.vlbert.encoder.layer.3.attention.self.key.weight', 'module.vlbert.encoder.layer.3.attention.self.key.bias', 'module.vlbert.encoder.layer.3.attention.self.value.weight', 'module.vlbert.encoder.layer.3.attention.self.value.bias', 'module.vlbert.encoder.layer.3.attention.output.dense.weight', 'module.vlbert.encoder.layer.3.attention.output.dense.bias', 'module.vlbert.encoder.layer.3.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.3.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.3.intermediate.dense.weight', 'module.vlbert.encoder.layer.3.intermediate.dense.bias', 'module.vlbert.encoder.layer.3.output.dense.weight', 'module.vlbert.encoder.layer.3.output.dense.bias', 'module.vlbert.encoder.layer.3.output.LayerNorm.weight', 'module.vlbert.encoder.layer.3.output.LayerNorm.bias', 'module.vlbert.encoder.layer.4.attention.self.query.weight', 'module.vlbert.encoder.layer.4.attention.self.query.bias', 'module.vlbert.encoder.layer.4.attention.self.key.weight', 'module.vlbert.encoder.layer.4.attention.self.key.bias', 'module.vlbert.encoder.layer.4.attention.self.value.weight', 'module.vlbert.encoder.layer.4.attention.self.value.bias', 'module.vlbert.encoder.layer.4.attention.output.dense.weight', 'module.vlbert.encoder.layer.4.attention.output.dense.bias', 'module.vlbert.encoder.layer.4.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.4.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.4.intermediate.dense.weight', 'module.vlbert.encoder.layer.4.intermediate.dense.bias', 'module.vlbert.encoder.layer.4.output.dense.weight', 'module.vlbert.encoder.layer.4.output.dense.bias', 'module.vlbert.encoder.layer.4.output.LayerNorm.weight', 'module.vlbert.encoder.layer.4.output.LayerNorm.bias', 'module.vlbert.encoder.layer.5.attention.self.query.weight', 'module.vlbert.encoder.layer.5.attention.self.query.bias', 'module.vlbert.encoder.layer.5.attention.self.key.weight', 'module.vlbert.encoder.layer.5.attention.self.key.bias', 'module.vlbert.encoder.layer.5.attention.self.value.weight', 'module.vlbert.encoder.layer.5.attention.self.value.bias', 'module.vlbert.encoder.layer.5.attention.output.dense.weight', 'module.vlbert.encoder.layer.5.attention.output.dense.bias', 'module.vlbert.encoder.layer.5.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.5.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.5.intermediate.dense.weight', 'module.vlbert.encoder.layer.5.intermediate.dense.bias', 'module.vlbert.encoder.layer.5.output.dense.weight', 'module.vlbert.encoder.layer.5.output.dense.bias', 'module.vlbert.encoder.layer.5.output.LayerNorm.weight', 'module.vlbert.encoder.layer.5.output.LayerNorm.bias', 'module.vlbert.encoder.layer.6.attention.self.query.weight', 'module.vlbert.encoder.layer.6.attention.self.query.bias', 'module.vlbert.encoder.layer.6.attention.self.key.weight', 'module.vlbert.encoder.layer.6.attention.self.key.bias', 'module.vlbert.encoder.layer.6.attention.self.value.weight', 'module.vlbert.encoder.layer.6.attention.self.value.bias', 'module.vlbert.encoder.layer.6.attention.output.dense.weight', 'module.vlbert.encoder.layer.6.attention.output.dense.bias', 'module.vlbert.encoder.layer.6.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.6.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.6.intermediate.dense.weight', 'module.vlbert.encoder.layer.6.intermediate.dense.bias', 'module.vlbert.encoder.layer.6.output.dense.weight', 'module.vlbert.encoder.layer.6.output.dense.bias', 'module.vlbert.encoder.layer.6.output.LayerNorm.weight', 'module.vlbert.encoder.layer.6.output.LayerNorm.bias', 'module.vlbert.encoder.layer.7.attention.self.query.weight', 'module.vlbert.encoder.layer.7.attention.self.query.bias', 'module.vlbert.encoder.layer.7.attention.self.key.weight', 'module.vlbert.encoder.layer.7.attention.self.key.bias', 'module.vlbert.encoder.layer.7.attention.self.value.weight', 'module.vlbert.encoder.layer.7.attention.self.value.bias', 'module.vlbert.encoder.layer.7.attention.output.dense.weight', 'module.vlbert.encoder.layer.7.attention.output.dense.bias', 'module.vlbert.encoder.layer.7.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.7.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.7.intermediate.dense.weight', 'module.vlbert.encoder.layer.7.intermediate.dense.bias', 'module.vlbert.encoder.layer.7.output.dense.weight', 'module.vlbert.encoder.layer.7.output.dense.bias', 'module.vlbert.encoder.layer.7.output.LayerNorm.weight', 'module.vlbert.encoder.layer.7.output.LayerNorm.bias', 'module.vlbert.encoder.layer.8.attention.self.query.weight', 'module.vlbert.encoder.layer.8.attention.self.query.bias', 'module.vlbert.encoder.layer.8.attention.self.key.weight', 'module.vlbert.encoder.layer.8.attention.self.key.bias', 'module.vlbert.encoder.layer.8.attention.self.value.weight', 'module.vlbert.encoder.layer.8.attention.self.value.bias', 'module.vlbert.encoder.layer.8.attention.output.dense.weight', 'module.vlbert.encoder.layer.8.attention.output.dense.bias', 'module.vlbert.encoder.layer.8.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.8.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.8.intermediate.dense.weight', 'module.vlbert.encoder.layer.8.intermediate.dense.bias', 'module.vlbert.encoder.layer.8.output.dense.weight', 'module.vlbert.encoder.layer.8.output.dense.bias', 'module.vlbert.encoder.layer.8.output.LayerNorm.weight', 'module.vlbert.encoder.layer.8.output.LayerNorm.bias', 'module.vlbert.encoder.layer.9.attention.self.query.weight', 'module.vlbert.encoder.layer.9.attention.self.query.bias', 'module.vlbert.encoder.layer.9.attention.self.key.weight', 'module.vlbert.encoder.layer.9.attention.self.key.bias', 'module.vlbert.encoder.layer.9.attention.self.value.weight', 'module.vlbert.encoder.layer.9.attention.self.value.bias', 'module.vlbert.encoder.layer.9.attention.output.dense.weight', 'module.vlbert.encoder.layer.9.attention.output.dense.bias', 'module.vlbert.encoder.layer.9.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.9.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.9.intermediate.dense.weight', 'module.vlbert.encoder.layer.9.intermediate.dense.bias', 'module.vlbert.encoder.layer.9.output.dense.weight', 'module.vlbert.encoder.layer.9.output.dense.bias', 'module.vlbert.encoder.layer.9.output.LayerNorm.weight', 'module.vlbert.encoder.layer.9.output.LayerNorm.bias', 'module.vlbert.encoder.layer.10.attention.self.query.weight', 'module.vlbert.encoder.layer.10.attention.self.query.bias', 'module.vlbert.encoder.layer.10.attention.self.key.weight', 'module.vlbert.encoder.layer.10.attention.self.key.bias', 'module.vlbert.encoder.layer.10.attention.self.value.weight', 'module.vlbert.encoder.layer.10.attention.self.value.bias', 'module.vlbert.encoder.layer.10.attention.output.dense.weight', 'module.vlbert.encoder.layer.10.attention.output.dense.bias', 'module.vlbert.encoder.layer.10.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.10.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.10.intermediate.dense.weight', 'module.vlbert.encoder.layer.10.intermediate.dense.bias', 'module.vlbert.encoder.layer.10.output.dense.weight', 'module.vlbert.encoder.layer.10.output.dense.bias', 'module.vlbert.encoder.layer.10.output.LayerNorm.weight', 'module.vlbert.encoder.layer.10.output.LayerNorm.bias', 'module.vlbert.encoder.layer.11.attention.self.query.weight', 'module.vlbert.encoder.layer.11.attention.self.query.bias', 'module.vlbert.encoder.layer.11.attention.self.key.weight', 'module.vlbert.encoder.layer.11.attention.self.key.bias', 'module.vlbert.encoder.layer.11.attention.self.value.weight', 'module.vlbert.encoder.layer.11.attention.self.value.bias', 'module.vlbert.encoder.layer.11.attention.output.dense.weight', 'module.vlbert.encoder.layer.11.attention.output.dense.bias', 'module.vlbert.encoder.layer.11.attention.output.LayerNorm.weight', 'module.vlbert.encoder.layer.11.attention.output.LayerNorm.bias', 'module.vlbert.encoder.layer.11.intermediate.dense.weight', 'module.vlbert.encoder.layer.11.intermediate.dense.bias', 'module.vlbert.encoder.layer.11.output.dense.weight', 'module.vlbert.encoder.layer.11.output.dense.bias', 'module.vlbert.encoder.layer.11.output.LayerNorm.weight', 'module.vlbert.encoder.layer.11.output.LayerNorm.bias', 'module.vlbert.mlm_head.predictions.bias', 'module.vlbert.mlm_head.predictions.transform.dense.weight', 'module.vlbert.mlm_head.predictions.transform.dense.bias', 'module.vlbert.mlm_head.predictions.transform.LayerNorm.weight', 'module.vlbert.mlm_head.predictions.transform.LayerNorm.bias', 'module.vlbert.mlm_head.predictions.decoder.weight'])
[Partial Load] non matched keys: ['object_mask_word_embedding.weight', 'aux_text_visual_embedding.weight', 'vlbert.mvrc_head.transform.dense.weight', 'vlbert.mvrc_head.transform.dense.bias', 'vlbert.mvrc_head.region_cls_pred.weight', 'vlbert.mvrc_head.region_cls_pred.bias']
[Partial Load] non pretrain keys: []
PROGRESS: 0.00%
PROGRESS: 0.00%
PROGRESS: 0.00%
PROGRESS: 0.00%
PROGRESS: 0.00%
PROGRESS: 0.00%
____ torch.Size([2, 38])
____ torch.Size([2, 35])
____ torch.Size([2, 7])
____ torch.Size([2, 19])
____ torch.Size([2, 25])
____ torch.Size([2, 12])
Rank[ 3]Epoch[0] Batch [0] Speed: - samples/sec ETA: - d - h - m Train-MLMAcc=nan, MLMLossWVC=0.000000, MVRCLoss=0.473684,
Command to run:
NCCL_DEBUG=WARN CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 ./scripts/dist_run_single.sh 5 contrastive_pretrain/train_end2end.py ./cfgs/contrastive_pretrain/base_prec_random_movienet_images_4x16G_fp32.yaml ./checkpoints_debug2 | tee debug2.txt
Namespace(cfg='./cfgs/contrastive_pretrain/base_prec_random_movienet_images_4x16G_fp32.yaml', cudnn_off=False, dist=True, do_test=False, log_dir='./checkpoints_debug2/./output/vl-bert/contrastive_random_images/base_prec_random_movienet_images_4x16G_fp32/train_train/tensorboard_logs', model_dir='./checkpoints_debug2', slurm=False)
{'CHECKPOINT_FREQUENT': 1,
'DATASET': {'ADD_IMAGE_AS_A_BOX': True,
'ANSWER_VOCAB_FILE': '',
'ANSWER_VOCAB_SIZE': 3129,
'APPEND_INDEX': False,
'BASIC_ALIGN': False,
'CACHE_MODE': False,
'DATASET': 'movienet',
'DATASET_PATH': '/proj/vondrick/datasets/MovieNet',
'IGNORE_DB_CACHE': False,
'LABEL_INDEX_IN_BATCH': -1,
'MASK_SIZE': 14,
'MIN_SEQ_LEN': 0,
'ONLY_USE_RELEVANT_DETS': True,
'QA2R_AUG': False,
'QA2R_NOQ': False,
'ROOT_PATH': './',
'SEQ_LEN': 64,
'TASK': 'Q2AR',
'TEST_ANNOTATION_FILE': '',
'TEST_IMAGE_SET': 'val',
'TRAIN_ANNOTATION_FILE': '',
'TRAIN_IMAGE_SET': 'train',
'VAL_ANNOTATION_FILE': '',
'VAL_IMAGE_SET': 'val',
'ZIP_MODE': False},
'GPUS': '0',
'LOG_FREQUENT': 100,
'MODEL_PREFIX': 'vl-bert_base_res101_pretrain',
'MODULE': 'ResNetVLBERTForPretraining',
'NETWORK': {'ANS_LOSS_WEIGHT': 1.0,
'BERT_ALIGN_ANSWER': True,
'BERT_ALIGN_QUESTION': True,
'BERT_FROZEN': False,
'BERT_MODEL_NAME': './model/pretrained_model/bert-base-uncased',
'BERT_PRETRAINED': '',
'BERT_PRETRAINED_EPOCH': 0,
'BERT_USE_LAYER': -2,
'BERT_WITH_MLM_LOSS': False,
'BERT_WITH_NSP_LOSS': False,
'BLIND': False,
'CLASSIFIER_DROPOUT': 0.1,
'CLASSIFIER_HIDDEN_SIZE': 1024,
'CLASSIFIER_SIGMOID': False,
'CLASSIFIER_SIGMOID_LOSS_POSITIVE_WEIGHT': 1.0,
'CLASSIFIER_TYPE': '2fc',
'CNN_LOSS_WEIGHT': 1.0,
'ENABLE_CNN_REG_LOSS': False,
'FOR_MASK_VL_MODELING_PRETRAIN': False,
'IMAGE_C5_DILATED': True,
'IMAGE_FEAT_PRECOMPUTED': False,
'IMAGE_FINAL_DIM': 768,
'IMAGE_FROZEN_BACKBONE_STAGES': [1, 2],
'IMAGE_FROZEN_BN': True,
'IMAGE_NUM_LAYERS': 101,
'IMAGE_PRETRAINED': '',
'IMAGE_PRETRAINED_EPOCH': 0,
'IMAGE_SEMANTIC': False,
'IMAGE_STRIDE_IN_1x1': True,
'MASK_RAW_PIXELS': True,
'MLM_LOSS_NORM_IN_BATCH_FIRST': False,
'MVRC_LOSS_NORM_IN_BATCH_FIRST': False,
'NO_GROUNDING': False,
'NO_OBJ_ATTENTION': False,
'OUTPUT_CONV5': False,
'PARTIAL_PRETRAIN': '/proj/vondrick/amogh/commonsense/VL-BERT/model/pretrained_model/vl-bert-base-e2e-backup.model',
'PARTIAL_PRETRAIN_PREFIX_CHANGES': [],
'PIXEL_MEANS': [102.9801, 115.9465, 122.7717],
'PIXEL_STDS': [1.0, 1.0, 1.0],
'VLBERT': {'attention_probs_dropout_prob': 0.1,
'from_scratch': False,
'hidden_act': 'gelu',
'hidden_dropout_prob': 0.1,
'hidden_size': 768,
'initializer_range': 0.02,
'input_size': 1280,
'input_transform_type': 1,
'intermediate_size': 3072,
'max_position_embeddings': 512,
'num_attention_heads': 12,
'num_hidden_layers': 12,
'obj_pos_id_relative': True,
'object_word_embed_mode': 2,
'pos_embedding_frozen': False,
'position_padding_idx': -1,
'type_vocab_size': 3,
'visual_ln': True,
'visual_region_classes': 81,
'visual_scale_object_init': 0.0,
'visual_scale_text_init': 0.0,
'visual_size': 768,
'vocab_size': 30522,
'with_pooler': False,
'word_embedding_frozen': False},
'WITH_CONT_LOSS': False,
'WITH_MLM_LOSS': True,
'WITH_MVRC_LOSS': False,
'WITH_REL_LOSS': False},
'NUM_WORKERS_PER_GPU': 5,
'OUTPUT_PATH': './checkpoints_debug2/./output/vl-bert/contrastive_random_images',
'RNG_SEED': 12345,
'SCALES': [600, 1000],
'TEST': {'BATCH_IMAGES': 32,
'FLIP_PROB': 0,
'SHUFFLE': False,
'TEST_EPOCH': 0},
'TRAIN': {'ASPECT_GROUPING': False,
'AUTO_RESUME': True,
'BATCH_IMAGES': 2,
'BEGIN_EPOCH': 0,
'CLIP_GRAD_NORM': 10,
'END_EPOCH': 10,
'FLIP_PROB': 0.5,
'FP16': False,
'FP16_LOSS_SCALE': 128.0,
'GRAD_ACCUMULATE_STEPS': 1,
'LOSS_LOGGERS': [('mlm_loss', 'MLMLossWVC'),
('mvrc_loss', 'MVRCLoss')],
'LR': 1e-07,
'LR_FACTOR': 0.1,
'LR_MULT': [],
'LR_SCHEDULE': 'triangle',
'LR_STEP': [],
'MOMENTUM': 0.9,
'OPTIMIZER': 'AdamW',
'RESUME': False,
'SHUFFLE': True,
'WARMUP': True,
'WARMUP_FACTOR': 0.0,
'WARMUP_METHOD': 'linear',
'WARMUP_STEPS': 8000,
'WD': 0.0001},
'VAL': {'BATCH_IMAGES': 32, 'FLIP_PROB': 0, 'SHUFFLE': False},
'VAL_FREQUENT': 1}
Warnings: Unexpected keys: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias'].
NCCL version 2.7.8+cuda10.2
native distributed, size: 1, rank: 0, local rank: 0
>> Trainable Parameters:
------------------------------------------------------------------------------------------------------------------------------------
|Name |Dtype |Shape |#Params |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.0.conv1.weight |torch.float32 |(128, 256, 1, 1) |32768 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.0.conv2.weight |torch.float32 |(128, 128, 3, 3) |147456 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.0.conv3.weight |torch.float32 |(512, 128, 1, 1) |65536 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.0.downsample.0.weight |torch.float32 |(512, 256, 1, 1) |131072 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.1.conv1.weight |torch.float32 |(128, 512, 1, 1) |65536 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.1.conv2.weight |torch.float32 |(128, 128, 3, 3) |147456 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.1.conv3.weight |torch.float32 |(512, 128, 1, 1) |65536 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.2.conv1.weight |torch.float32 |(128, 512, 1, 1) |65536 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.2.conv2.weight |torch.float32 |(128, 128, 3, 3) |147456 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.2.conv3.weight |torch.float32 |(512, 128, 1, 1) |65536 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.3.conv1.weight |torch.float32 |(128, 512, 1, 1) |65536 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.3.conv2.weight |torch.float32 |(128, 128, 3, 3) |147456 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer2.3.conv3.weight |torch.float32 |(512, 128, 1, 1) |65536 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.0.conv1.weight |torch.float32 |(256, 512, 1, 1) |131072 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.0.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.0.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.0.downsample.0.weight |torch.float32 |(1024, 512, 1, 1) |524288 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.1.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.1.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.1.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.2.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.2.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.2.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.3.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.3.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.3.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.4.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.4.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.4.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.5.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.5.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.5.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.6.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.6.conv2.weight |torch.float32 |(256, 256, 3, 3) |589824 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.6.conv3.weight |torch.float32 |(1024, 256, 1, 1) |262144 |
------------------------------------------------------------------------------------------------------------------------------------
|image_feature_extractor.backbone.layer3.7.conv1.weight |torch.float32 |(256, 1024, 1, 1) |262