Skip to content

Instantly share code, notes, and snippets.

@Atry
Last active June 8, 2024 01:06
Show Gist options
  • Save Atry/4ebf4e6208a2a3628f65c85a40f9c49d to your computer and use it in GitHub Desktop.
Save Atry/4ebf4e6208a2a3628f65c85a40f9c49d to your computer and use it in GitHub Desktop.
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[4], line 92
90 with torch.no_grad():
91 deepspeed_hybrid_engine.eval()
---> 92 print(deepspeed_hybrid_engine.generate(
93 torch.tensor([[1]], dtype=torch.int, device=deepspeed_hybrid_engine.device),
94 synced_gpus=True,
95 generation_config=GenerationConfig(max_new_tokens=20),
96 ))
File ~/peftai/.venv/lib/python3.11/site-packages/deepspeed/runtime/hybrid_engine.py:254, in DeepSpeedHybridEngine.generate(self, *inputs, **kwargs)
251 self.fuse_lora_weight()
253 self.retake_inference_cache()
--> 254 generate_ret_vals = self._generate(*inputs, **kwargs)
256 if len(self.all_lora_params) > 0:
257 self.unfuse_lora_weight()
File ~/peftai/.venv/lib/python3.11/site-packages/torch/utils/_contextlib.py:115, in context_decorator.<locals>.decorate_context(*args, **kwargs)
112 @functools.wraps(func)
113 def decorate_context(*args, **kwargs):
114 with ctx_factory():
--> 115 return func(*args, **kwargs)
File ~/peftai/.venv/lib/python3.11/site-packages/transformers/generation/utils.py:1576, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)
1559 result = self._assisted_decoding(
1560 input_ids,
1561 candidate_generator=candidate_generator,
(...)
1572 **model_kwargs,
1573 )
1574 if generation_mode == GenerationMode.GREEDY_SEARCH:
1575 # 11. run greedy search
-> 1576 result = self._greedy_search(
1577 input_ids,
1578 logits_processor=prepared_logits_processor,
1579 stopping_criteria=prepared_stopping_criteria,
1580 pad_token_id=generation_config.pad_token_id,
1581 output_scores=generation_config.output_scores,
1582 output_logits=generation_config.output_logits,
1583 return_dict_in_generate=generation_config.return_dict_in_generate,
1584 synced_gpus=synced_gpus,
1585 streamer=streamer,
1586 **model_kwargs,
1587 )
1589 elif generation_mode == GenerationMode.CONTRASTIVE_SEARCH:
1590 if not model_kwargs["use_cache"]:
File ~/peftai/.venv/lib/python3.11/site-packages/transformers/generation/utils.py:2494, in GenerationMixin._greedy_search(self, input_ids, logits_processor, stopping_criteria, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, output_logits, return_dict_in_generate, synced_gpus, streamer, **model_kwargs)
2491 model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)
2493 # forward pass to get next token
-> 2494 outputs = self(
2495 **model_inputs,
2496 return_dict=True,
2497 output_attentions=output_attentions,
2498 output_hidden_states=output_hidden_states,
2499 )
2501 if synced_gpus and this_peer_finished:
2502 continue # don't waste resources running the code we don't need
File ~/peftai/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py:1532, in Module._wrapped_call_impl(self, *args, **kwargs)
1530 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1531 else:
-> 1532 return self._call_impl(*args, **kwargs)
File ~/peftai/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py:1582, in Module._call_impl(self, *args, **kwargs)
1579 bw_hook = hooks.BackwardHook(self, full_backward_hooks, backward_pre_hooks)
1580 args = bw_hook.setup_input_hook(args)
-> 1582 result = forward_call(*args, **kwargs)
1583 if _global_forward_hooks or self._forward_hooks:
1584 for hook_id, hook in (
1585 *_global_forward_hooks.items(),
1586 *self._forward_hooks.items(),
1587 ):
1588 # mark that always called hook is run
File ~/peftai/.venv/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:1208, in LlamaForCausalLM.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, cache_position)
1205 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
1207 # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
-> 1208 outputs = self.model(
1209 input_ids=input_ids,
1210 attention_mask=attention_mask,
1211 position_ids=position_ids,
1212 past_key_values=past_key_values,
1213 inputs_embeds=inputs_embeds,
1214 use_cache=use_cache,
1215 output_attentions=output_attentions,
1216 output_hidden_states=output_hidden_states,
1217 return_dict=return_dict,
1218 cache_position=cache_position,
1219 )
1221 hidden_states = outputs[0]
1222 if self.config.pretraining_tp > 1:
File ~/peftai/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py:1532, in Module._wrapped_call_impl(self, *args, **kwargs)
1530 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1531 else:
-> 1532 return self._call_impl(*args, **kwargs)
File ~/peftai/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py:1582, in Module._call_impl(self, *args, **kwargs)
1579 bw_hook = hooks.BackwardHook(self, full_backward_hooks, backward_pre_hooks)
1580 args = bw_hook.setup_input_hook(args)
-> 1582 result = forward_call(*args, **kwargs)
1583 if _global_forward_hooks or self._forward_hooks:
1584 for hook_id, hook in (
1585 *_global_forward_hooks.items(),
1586 *self._forward_hooks.items(),
1587 ):
1588 # mark that always called hook is run
File ~/peftai/.venv/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:1018, in LlamaModel.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, cache_position)
1007 layer_outputs = self._gradient_checkpointing_func(
1008 decoder_layer.__call__,
1009 hidden_states,
(...)
1015 cache_position,
1016 )
1017 else:
-> 1018 layer_outputs = decoder_layer(
1019 hidden_states,
1020 attention_mask=causal_mask,
1021 position_ids=position_ids,
1022 past_key_value=past_key_values,
1023 output_attentions=output_attentions,
1024 use_cache=use_cache,
1025 cache_position=cache_position,
1026 )
1028 hidden_states = layer_outputs[0]
1030 if use_cache:
File ~/peftai/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py:1532, in Module._wrapped_call_impl(self, *args, **kwargs)
1530 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1531 else:
-> 1532 return self._call_impl(*args, **kwargs)
File ~/peftai/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py:1582, in Module._call_impl(self, *args, **kwargs)
1579 bw_hook = hooks.BackwardHook(self, full_backward_hooks, backward_pre_hooks)
1580 args = bw_hook.setup_input_hook(args)
-> 1582 result = forward_call(*args, **kwargs)
1583 if _global_forward_hooks or self._forward_hooks:
1584 for hook_id, hook in (
1585 *_global_forward_hooks.items(),
1586 *self._forward_hooks.items(),
1587 ):
1588 # mark that always called hook is run
File ~/peftai/.venv/lib/python3.11/site-packages/deepspeed/model_implementations/transformers/ds_transformer.py:171, in DeepSpeedTransformerInference.forward(self, input, input_mask, attention_mask, attn_mask, head_mask, layer_past, get_key_value, get_present, encoder_output, enc_dec_attn_mask, x, encoder_hidden_states, encoder_attention_mask, use_cache, alibi, output_attentions, layer_head_mask, past_key_value, **kwargs)
167 input = input.to(target_dtype)
169 with torch.no_grad():
170 attention_output, key, value, context_outputtn_ctx, inp_norm = \
--> 171 self.attention(input,
172 input_mask,
173 head_mask,
174 layer_past,
175 get_present,
176 encoder_hidden_states,
177 encoder_attention_mask,
178 output_attentions,
179 self.norm_w,
180 self.norm_b,
181 alibi)
183 presents = (key, value)
184 self.layer_past = presents if layer_past is None else None
File ~/peftai/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py:1532, in Module._wrapped_call_impl(self, *args, **kwargs)
1530 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1531 else:
-> 1532 return self._call_impl(*args, **kwargs)
File ~/peftai/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py:1541, in Module._call_impl(self, *args, **kwargs)
1536 # If we don't have any hooks, we want to skip the rest of the logic in
1537 # this function, and just call forward.
1538 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1539 or _global_backward_pre_hooks or _global_backward_hooks
1540 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1541 return forward_call(*args, **kwargs)
1543 try:
1544 result = None
File ~/peftai/.venv/lib/python3.11/site-packages/deepspeed/ops/transformer/inference/ds_attention.py:141, in DeepSpeedSelfAttention.forward(self, input, input_mask, head_mask, layer_past, get_present, encoder_hidden_states, encoder_attention_mask, output_attentions, norm_w, norm_b, alibi)
128 def forward(self,
129 input,
130 input_mask,
(...)
138 norm_b=None,
139 alibi=None):
140 if self.attn_qkvw is None:
--> 141 self._attn_qkvw, self._attn_qkvb = self._merge_qkv()
142 else:
143 self._attn_qkvw = self.attn_qkvw
File ~/peftai/.venv/lib/python3.11/site-packages/deepspeed/ops/transformer/inference/ds_attention.py:118, in DeepSpeedSelfAttention._merge_qkv(self)
116 def _merge_qkv(self):
117 qvkw = DeepSpeedSelfAttention._qkv_buffers[0]
--> 118 qvkw[:self.hidden_size_per_partition, :] = self.attn_qw # type: ignore
119 qvkw[self.hidden_size_per_partition:2 * self.hidden_size_per_partition, :] = self.attn_kw # type: ignore
120 qvkw[2 * self.hidden_size_per_partition:, :] = self.attn_vw # type: ignore
RuntimeError: The expanded size of the tensor (2048) must match the existing size (1179648) at non-singleton dimension 1. Target sizes: [2048, 2048]. Tensor sizes: [1179648]
Using quantizer for weights: CUDAQuantizer
[2024-06-08 00:54:02,799] [INFO] [partition_parameters.py:562:patch_init_and_builtins] Enable Zero3 engine with INT4 quantization.
[2024-06-08 00:54:03,256] [INFO] [partition_parameters.py:345:__exit__] finished initializing model - num_params = 603, num_elems = 3.30B
[2024-06-08 00:54:08,102] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False
[2024-06-08 00:54:08,103] [INFO] [logging.py:96:log_dist] [Rank 0] Creating ZeRO Offload
[2024-06-08 00:54:08,290] [INFO] [utils.py:779:see_memory_usage] DeepSpeedZeRoOffload initialize [begin]
[2024-06-08 00:54:08,291] [INFO] [utils.py:780:see_memory_usage] MA 1.84 GB Max_MA 2.21 GB CA 2.38 GB Max_CA 2 GB
[2024-06-08 00:54:08,292] [INFO] [utils.py:787:see_memory_usage] CPU Virtual Memory: used = 7.96 GB, percent = 25.4%
Parameter Offload: Total persistent parameters: 92160 in 45 params
[2024-06-08 00:54:08,478] [INFO] [utils.py:779:see_memory_usage] DeepSpeedZeRoOffload initialize [end]
[2024-06-08 00:54:08,479] [INFO] [utils.py:780:see_memory_usage] MA 1.84 GB Max_MA 1.84 GB CA 2.38 GB Max_CA 2 GB
[2024-06-08 00:54:08,480] [INFO] [utils.py:787:see_memory_usage] CPU Virtual Memory: used = 7.96 GB, percent = 25.4%
[2024-06-08 00:54:08,481] [INFO] [config.py:996:print] DeepSpeedEngine configuration:
[2024-06-08 00:54:08,482] [INFO] [config.py:1000:print] activation_checkpointing_config {
"partition_activations": false,
"contiguous_memory_optimization": false,
"cpu_checkpointing": false,
"number_checkpoints": null,
"synchronize_checkpoint_boundary": false,
"profile": false
}
[2024-06-08 00:54:08,482] [INFO] [config.py:1000:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True}
[2024-06-08 00:54:08,483] [INFO] [config.py:1000:print] amp_enabled .................. False
[2024-06-08 00:54:08,484] [INFO] [config.py:1000:print] amp_params ................... False
[2024-06-08 00:54:08,485] [INFO] [config.py:1000:print] autotuning_config ............ {
"enabled": false,
"start_step": null,
"end_step": null,
"metric_path": null,
"arg_mappings": null,
"metric": "throughput",
"model_info": null,
"results_dir": "autotuning_results",
"exps_dir": "autotuning_exps",
"overwrite": true,
"fast": true,
"start_profile_step": 3,
"end_profile_step": 5,
"tuner_type": "gridsearch",
"tuner_early_stopping": 5,
"tuner_num_trials": 50,
"model_info_path": null,
"mp_size": 1,
"max_train_batch_size": null,
"min_train_batch_size": 1,
"max_train_micro_batch_size_per_gpu": 1.024000e+03,
"min_train_micro_batch_size_per_gpu": 1,
"num_tuning_micro_batch_sizes": 3
}
[2024-06-08 00:54:08,485] [INFO] [config.py:1000:print] bfloat16_enabled ............. True
[2024-06-08 00:54:08,485] [INFO] [config.py:1000:print] bfloat16_immediate_grad_update False
[2024-06-08 00:54:08,486] [INFO] [config.py:1000:print] checkpoint_parallel_write_pipeline False
[2024-06-08 00:54:08,486] [INFO] [config.py:1000:print] checkpoint_tag_validation_enabled True
[2024-06-08 00:54:08,487] [INFO] [config.py:1000:print] checkpoint_tag_validation_fail False
[2024-06-08 00:54:08,487] [INFO] [config.py:1000:print] comms_config ................. <deepspeed.comm.config.DeepSpeedCommsConfig object at 0x7f9f266ea990>
[2024-06-08 00:54:08,488] [INFO] [config.py:1000:print] communication_data_type ...... None
[2024-06-08 00:54:08,488] [INFO] [config.py:1000:print] compile_config ............... enabled=False backend='inductor' kwargs={}
[2024-06-08 00:54:08,489] [INFO] [config.py:1000:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}}
[2024-06-08 00:54:08,489] [INFO] [config.py:1000:print] curriculum_enabled_legacy .... False
[2024-06-08 00:54:08,490] [INFO] [config.py:1000:print] curriculum_params_legacy ..... False
[2024-06-08 00:54:08,490] [INFO] [config.py:1000:print] data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}}
[2024-06-08 00:54:08,491] [INFO] [config.py:1000:print] data_efficiency_enabled ...... False
[2024-06-08 00:54:08,491] [INFO] [config.py:1000:print] dataloader_drop_last ......... False
[2024-06-08 00:54:08,491] [INFO] [config.py:1000:print] disable_allgather ............ False
[2024-06-08 00:54:08,492] [INFO] [config.py:1000:print] dump_state ................... False
[2024-06-08 00:54:08,492] [INFO] [config.py:1000:print] dynamic_loss_scale_args ...... None
[2024-06-08 00:54:08,493] [INFO] [config.py:1000:print] eigenvalue_enabled ........... False
[2024-06-08 00:54:08,493] [INFO] [config.py:1000:print] eigenvalue_gas_boundary_resolution 1
[2024-06-08 00:54:08,494] [INFO] [config.py:1000:print] eigenvalue_layer_name ........ bert.encoder.layer
[2024-06-08 00:54:08,494] [INFO] [config.py:1000:print] eigenvalue_layer_num ......... 0
[2024-06-08 00:54:08,495] [INFO] [config.py:1000:print] eigenvalue_max_iter .......... 100
[2024-06-08 00:54:08,495] [INFO] [config.py:1000:print] eigenvalue_stability ......... 1e-06
[2024-06-08 00:54:08,496] [INFO] [config.py:1000:print] eigenvalue_tol ............... 0.01
[2024-06-08 00:54:08,496] [INFO] [config.py:1000:print] eigenvalue_verbose ........... False
[2024-06-08 00:54:08,496] [INFO] [config.py:1000:print] elasticity_enabled ........... False
[2024-06-08 00:54:08,497] [INFO] [config.py:1000:print] flops_profiler_config ........ {
"enabled": false,
"recompute_fwd_factor": 0.0,
"profile_step": 1,
"module_depth": -1,
"top_modules": 1,
"detailed": true,
"output_file": null
}
[2024-06-08 00:54:08,497] [INFO] [config.py:1000:print] fp16_auto_cast ............... None
[2024-06-08 00:54:08,498] [INFO] [config.py:1000:print] fp16_enabled ................. False
[2024-06-08 00:54:08,498] [INFO] [config.py:1000:print] fp16_master_weights_and_gradients False
[2024-06-08 00:54:08,499] [INFO] [config.py:1000:print] global_rank .................. 0
[2024-06-08 00:54:08,499] [INFO] [config.py:1000:print] grad_accum_dtype ............. None
[2024-06-08 00:54:08,500] [INFO] [config.py:1000:print] gradient_accumulation_steps .. 1
[2024-06-08 00:54:08,500] [INFO] [config.py:1000:print] gradient_clipping ............ 0.0
[2024-06-08 00:54:08,501] [INFO] [config.py:1000:print] gradient_predivide_factor .... 1.0
[2024-06-08 00:54:08,501] [INFO] [config.py:1000:print] graph_harvesting ............. False
[2024-06-08 00:54:08,504] [INFO] [config.py:1000:print] hybrid_engine ................ enabled=False max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8
[2024-06-08 00:54:08,504] [INFO] [config.py:1000:print] initial_dynamic_scale ........ 1
[2024-06-08 00:54:08,505] [INFO] [config.py:1000:print] load_universal_checkpoint .... False
[2024-06-08 00:54:08,508] [INFO] [config.py:1000:print] loss_scale ................... 1.0
[2024-06-08 00:54:08,508] [INFO] [config.py:1000:print] memory_breakdown ............. False
[2024-06-08 00:54:08,509] [INFO] [config.py:1000:print] mics_hierarchial_params_gather False
[2024-06-08 00:54:08,509] [INFO] [config.py:1000:print] mics_shard_size .............. -1
[2024-06-08 00:54:08,510] [INFO] [config.py:1000:print] monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') enabled=False
[2024-06-08 00:54:08,510] [INFO] [config.py:1000:print] nebula_config ................ {
"enabled": false,
"persistent_storage_path": null,
"persistent_time_interval": 100,
"num_of_version_in_retention": 2,
"enable_nebula_load": true,
"load_path": null
}
[2024-06-08 00:54:08,511] [INFO] [config.py:1000:print] optimizer_legacy_fusion ...... False
[2024-06-08 00:54:08,511] [INFO] [config.py:1000:print] optimizer_name ............... None
[2024-06-08 00:54:08,512] [INFO] [config.py:1000:print] optimizer_params ............. None
[2024-06-08 00:54:08,512] [INFO] [config.py:1000:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0, 'pipe_partitioned': True, 'grad_partitioned': True}
[2024-06-08 00:54:08,512] [INFO] [config.py:1000:print] pld_enabled .................. False
[2024-06-08 00:54:08,513] [INFO] [config.py:1000:print] pld_params ................... False
[2024-06-08 00:54:08,513] [INFO] [config.py:1000:print] prescale_gradients ........... False
[2024-06-08 00:54:08,514] [INFO] [config.py:1000:print] scheduler_name ............... None
[2024-06-08 00:54:08,514] [INFO] [config.py:1000:print] scheduler_params ............. None
[2024-06-08 00:54:08,515] [INFO] [config.py:1000:print] seq_parallel_communication_data_type torch.float32
[2024-06-08 00:54:08,515] [INFO] [config.py:1000:print] sparse_attention ............. None
[2024-06-08 00:54:08,515] [INFO] [config.py:1000:print] sparse_gradients_enabled ..... False
[2024-06-08 00:54:08,516] [INFO] [config.py:1000:print] steps_per_print .............. 10
[2024-06-08 00:54:08,516] [INFO] [config.py:1000:print] train_batch_size ............. 1
[2024-06-08 00:54:08,517] [INFO] [config.py:1000:print] train_micro_batch_size_per_gpu 1
[2024-06-08 00:54:08,517] [INFO] [config.py:1000:print] use_data_before_expert_parallel_ False
[2024-06-08 00:54:08,517] [INFO] [config.py:1000:print] use_node_local_storage ....... False
[2024-06-08 00:54:08,519] [INFO] [config.py:1000:print] wall_clock_breakdown ......... False
[2024-06-08 00:54:08,520] [INFO] [config.py:1000:print] weight_quantization_config ... q_type='symmetric' q_groups=1 enabled=True num_bits=8 quantized_initialization={'num_bits': 4, 'group_size': 64, 'group_dim': 1, 'symmetric': False} post_init_quant={}
[2024-06-08 00:54:08,520] [INFO] [config.py:1000:print] world_size ................... 1
[2024-06-08 00:54:08,521] [INFO] [config.py:1000:print] zero_allow_untested_optimizer False
[2024-06-08 00:54:08,521] [INFO] [config.py:1000:print] zero_config .................. stage=3 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500,000,000 use_multi_rank_bucket_allreduce=True allgather_partitions=True allgather_bucket_size=500,000,000 overlap_comm=True load_from_fp32_weights=False elastic_checkpoint=False offload_param=None offload_optimizer=None sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50,000,000 param_persistence_threshold=100,000 model_persistence_threshold=sys.maxsize max_live_parameters=1,000,000,000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False zero_hpz_partition_size=1 zero_quantized_weights=True zero_quantized_nontrainable_weights=True zero_quantized_gradients=False mics_shard_size=-1 mics_hierarchical_params_gather=False memory_efficient_linear=True pipeline_loading_checkpoint=False override_module_apply=True
[2024-06-08 00:54:08,522] [INFO] [config.py:1000:print] zero_enabled ................. True
[2024-06-08 00:54:08,522] [INFO] [config.py:1000:print] zero_force_ds_cpu_optimizer .. True
[2024-06-08 00:54:08,522] [INFO] [config.py:1000:print] zero_optimization_stage ...... 3
[2024-06-08 00:54:08,523] [INFO] [config.py:986:print_user_config] json = {
"zero_optimization": {
"load_from_fp32_weights": false,
"stage": 3,
"zero_quantized_weights": true,
"zero_quantized_nontrainable_weights": true
},
"train_micro_batch_size_per_gpu": 1,
"bf16": {
"enabled": true
},
"weight_quantization": {
"quantized_initialization": {
"num_bits": 4,
"group_size": 64,
"group_dim": 1,
"symmetric": false
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment