Skip to content

Instantly share code, notes, and snippets.

@qianwch
Created December 30, 2023 05:48
Show Gist options
  • Save qianwch/ed3446d0b6f6dea9044e4b492e498fca to your computer and use it in GitHub Desktop.
Save qianwch/ed3446d0b6f6dea9044e4b492e498fca to your computer and use it in GitHub Desktop.
Mindformers-qwen-log
/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float64'> type is zero.
setattr(self, word, getattr(machar, word).flat[0])
/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero.
setattr(self, word, getattr(machar, word).flat[0])
/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/jieba/_compat.py:18: DeprecationWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html
import pkg_resources
[WARNING] Distributed Communication has not been inited. Use default RANK_SIZE: 1
[WARNING] Distributed Communication has not been inited. Use default RANK_SIZE: 1
[WARNING] Distributed Communication has not been inited. Use default RANK_ID: 0
[WARNING] Distributed Communication has not been inited. Use default RANK_ID: 0
[WARNING] HCCL_ADPT(87237,ffffb3047930,python):2023-12-30-00:14:29.436.847 [mindspore/ccsrc/plugin/device/ascend/hal/hccl_adapter/hccl_adapter.cc:63] GenHcclOptions] The environment variable DEPLOY_MODE is not set. Now set to default value 0
2023-12-30 00:14:29,672 - mindformers[mindformers/tools/utils.py:153] - INFO - set output path to './output'
2023-12-30 00:14:29,673 - mindformers[mindformers/trainer/trainer.py:176] - INFO - set output_dir from args:dict
2023-12-30 00:14:29,701 - mindformers[mindformers/trainer/base_trainer.py:85] - INFO - Now Running Task is: text_generation, Model is: qwen_14b
2023-12-30 00:14:29,701 - mindformers[mindformers/trainer/base_trainer.py:126] - WARNING - Input model name is not in the supported list or unspecified.
2023-12-30 00:14:29,702 - mindformers[mindformers/trainer/base_trainer.py:127] - WARNING - See the list of supported task and model name: OrderedDict([('general', OrderedDict([('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/general/run_general_task.yaml')])), ('masked_image_modeling', OrderedDict([('mae_vit_base_p16', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/mae/run_mae_vit_base_p16_224_800ep.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/mae/run_mae_vit_base_p16_224_800ep.yaml')])), ('image_classification', OrderedDict([('vit_base_p16', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/vit/run_vit_base_p16_224_100ep.yaml'), ('swin_base_p4w7', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/swin/run_swin_base_p4w7_224_100ep.yaml'), ('mindspore/vit_base_p16', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/vit/run_vit_base_p16_224_100ep.yaml'), ('mindspore/swin_base_p4w7', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/swin/run_swin_base_p4w7_224_100ep.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/vit/run_vit_base_p16_224_100ep.yaml')])), ('fill_mask', OrderedDict([('bert_base_uncased', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bert/run_bert_base_uncased.yaml'), ('bert_tiny_uncased', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bert/run_bert_tiny_uncased.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bert/run_bert_tiny_uncased.yaml')])), ('contrastive_language_image_pretrain', OrderedDict([('clip_vit_b_32', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_32_pretrain_flickr8k.yaml'), ('blip2_stage1_vit_g', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage1_vit_g_qformer_pretrain.yaml'), ('blip2_stage2_vit_g_baichuan_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage2_vit_g_baichuan_7b.yaml'), ('blip2_stage2_vit_g_llama_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage2_vit_g_llama_7b.yaml'), ('mindspore/clip_vit_b_32', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_32_pretrain_flickr8k.yaml'), ('clip_vit_b_16', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_16_pretrain_flickr8k.yaml'), ('clip_vit_l_14', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_l_14_pretrain_flickr8k.yaml'), ('clip_vit_l_14@336', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_l_14@336_pretrain_flickr8k.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_32_pretrain_flickr8k.yaml')])), ('image_to_text_retrieval', OrderedDict([('blip2_stage1_evaluator', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage1_vit_g_retrieval_flickr30k.yaml')])), ('zero_shot_image_classification', OrderedDict([('clip_vit_b_32', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_32_zero_shot_image_classification_cifar100.yaml'), ('mindspore/clip_vit_b_32', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_32_zero_shot_image_classification_cifar100.yaml'), ('clip_vit_b_16', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_16_zero_shot_image_classification_cifar100.yaml'), ('clip_vit_l_14', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_l_14_zero_shot_image_classification_cifar100.yaml'), ('clip_vit_l_14@336', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_l_14@336_zero_shot_image_classification_cifar100.yaml'), ('blip2_stage1_classification', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage1_vit_g_zero_shot_image_classification_cifar100.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_clip_vit_b_32_zero_shot_image_classification_cifar100.yaml')])), ('image_to_text_generation', OrderedDict([('itt_blip2_stage2_vit_g_baichuan_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage2_vit_g_baichuan_7b_image_to_text_generation.yaml'), ('itt_blip2_stage2_vit_g_llama_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/blip2/run_blip2_stage2_vit_g_llama_7b_image_to_text_generation.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/clip/run_blip2_stage2_vit_g_llama_7b_image_to_text_generation.yaml')])), ('translation', OrderedDict([('t5_small', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/t5/run_t5_small_on_wmt16.yaml'), ('t5_tiny', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/t5/run_t5_tiny_on_wmt16.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/t5/run_t5_small_on_wmt16.yaml')])), ('text_classification', OrderedDict([('txtcls_bert_base_uncased', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/txtcls/run_txtcls_bert_base_uncased.yaml'), ('txtcls_bert_base_uncased_mnli', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/txtcls/run_txtcls_bert_base_uncased_mnli.yaml'), ('mindspore/txtcls_bert_base_uncased_mnli', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/txtcls/run_txtcls_bert_base_uncased_mnli.yaml'), ('gpt2_txtcls', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2_txtcls.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/txtcls/run_txtcls_bert_base_uncased.yaml')])), ('token_classification', OrderedDict([('tokcls_bert_base_chinese', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/tokcls/run_tokcls_bert_base_chinese.yaml'), ('tokcls_bert_base_chinese_cluener', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/tokcls/run_tokcls_bert_base_chinese_cluener.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/tokcls/run_tokcls_bert_base_chinese.yaml')])), ('question_answering', OrderedDict([('qa_bert_base_uncased', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/qa/run_qa_bert_base_uncased.yaml'), ('qa_bert_base_uncased_squad', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/qa/run_qa_bert_base_uncased.yaml'), ('mindspore/qa_bert_base_uncased', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/qa/run_qa_bert_base_uncased.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/qa/run_qa_bert_base_uncased.yaml')])), ('text_generation', OrderedDict([('gpt2', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2.yaml'), ('gpt2_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2_lora.yaml'), ('gpt2_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2_13b.yaml'), ('gpt2_52b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2_52b.yaml'), ('gpt2_xl', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2_xl.yaml'), ('gpt2_xl_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2_xl_lora.yaml'), ('llama_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama/run_llama_7b.yaml'), ('llama_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama/run_llama_13b.yaml'), ('llama_65b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama/run_llama_65b.yaml'), ('llama2_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama2/run_llama2_7b.yaml'), ('llama2_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama2/run_llama2_13b.yaml'), ('llama2_70b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama2/run_llama2_70b.yaml'), ('codellama_34b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/codellama/run_codellama_34b_910b.yaml'), ('llama_7b_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/llama/run_llama_7b_lora.yaml'), ('pangualpha_2_6b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/pangualpha/run_pangualpha_2_6b.yaml'), ('pangualpha_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/pangualpha/run_pangualpha_13b.yaml'), ('glm_6b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm/run_glm_6b_finetune.yaml'), ('glm_6b_chat', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm/run_glm_6b_infer.yaml'), ('glm_6b_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm/run_glm_6b_lora.yaml'), ('glm_6b_lora_chat', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm/run_glm_6b_lora_infer.yaml'), ('glm2_6b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm2/run_glm2_6b.yaml'), ('glm2_6b_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm2/run_glm2_6b_lora.yaml'), ('glm2_6b_ptuning2', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm2/run_glm2_6b_ptuning2.yaml'), ('glm3_6b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/glm3/run_glm3_6b.yaml'), ('codegeex2_6b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/codegeex2/run_codegeex2_6b.yaml'), ('bloom_560m', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bloom/run_bloom_560m.yaml'), ('bloom_7.1b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bloom/run_bloom_7.1b.yaml'), ('bloom_65b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bloom/run_bloom_65b.yaml'), ('bloom_176b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/bloom/run_bloom_176b.yaml'), ('baichuan_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/baichuan/run_baichuan_7b.yaml'), ('baichuan2_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/baichuan2/run_baichuan2_7b.yaml'), ('baichuan2_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/baichuan2/run_baichuan2_13b.yaml'), ('ziya_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/ziya/run_ziya_13b.yaml'), ('skywork_13b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/skywork/run_skywork_13b.yaml'), ('internlm_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/internlm/run_internlm_7b.yaml'), ('internlm_7b_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/internlm/run_internlm_7b_lora.yaml'), ('qwen_7b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/qwen/run_qwen_7b.yaml'), ('qwen_7b_lora', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/research/qwen/run_qwen_7b_lora.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2.yaml')])), ('segment_anything', OrderedDict([('sam_vit_b', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/sam/run_sam_vit-b.yaml'), ('sam_vit_l', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/sam/run_sam_vit-l.yaml'), ('sam_vit_h', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/sam/run_sam_vit-h.yaml'), ('common', '/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/sam/run_sam_vit-h.yaml')]))])
2023-12-30 00:14:29,702 - mindformers[mindformers/trainer/base_trainer.py:128] - WARNING - The default model config: /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/configs/gpt2/run_gpt2.yaml will now be used for the text_generation task
2023-12-30 00:14:29,703 - mindformers[mindformers/core/parallel_config.py:51] - INFO - initial parallel_config from dict: {'data_parallel': 1, 'model_parallel': 4, 'pipeline_stage': 1, 'use_seq_parallel': False, 'micro_batch_num': 4, 'vocab_emb_dp': True, 'gradient_aggregation_group': 4}
2023-12-30 00:14:29,703 - mindformers[mindformers/trainer/base_trainer.py:191] - INFO - The current parallel mode is semi_auto_parallel, full batch is True,so global batch size will be changed: global_batch_size = batch_size * data_parallel * micro_batch_interleave_num * gradient_accumulation_steps = 1 = 1 * 1 * 1 * 1
2023-12-30 00:14:29,703 - mindformers[mindformers/trainer/base_trainer.py:371] - INFO - .........Build Network From Config..........
2023-12-30 00:14:29,704 - mindformers[mindformers/models/llama/llama_config.py:177] - WARNING - Argument `use_past_shard` is deprecated.
2023-12-30 00:14:29,704 - mindformers[mindformers/version_control.py:60] - INFO - The Cell Reuse compilation acceleration feature is not supported when the environment variable ENABLE_CELL_REUSE is 0 or MindSpore version is earlier than 2.1.0 or stand_alone mode or pipeline_stages <= 1
2023-12-30 00:14:29,704 - mindformers[mindformers/version_control.py:64] - INFO -
The current ENABLE_CELL_REUSE=0, please set the environment variable as follows:
export ENABLE_CELL_REUSE=1 to enable the Cell Reuse compilation acceleration feature.
2023-12-30 00:14:29,704 - mindformers[mindformers/version_control.py:73] - INFO - The Cell Reuse compilation acceleration feature only works in pipeline parallel mode(pipeline_stage>1).Current pipeline stage=1, the feature is disabled by default.
2023-12-30 00:14:29,710 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:29,712 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:30,357 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:30,362 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:30,364 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:14:30.369.919 [mindspore/common/parameter.py:786] This interface may be deleted in the future.
2023-12-30 00:14:30,372 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:30,373 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:31,018 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:31,027 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:31,029 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:31,037 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:31,039 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:31,680 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:31,684 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:31,686 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:31,693 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:31,695 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:32,337 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:32,340 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:32,342 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:32,350 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:32,351 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:32,992 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:32,996 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:32,998 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:33,005 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:33,007 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:33,647 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:33,651 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:33,653 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:33,660 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:33,662 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:34,305 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:34,309 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:34,311 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:34,318 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:34,320 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:34,961 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:34,965 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:34,966 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:34,974 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:34,975 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:35,617 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:35,621 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:35,623 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:35,630 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:35,632 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:36,271 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:36,275 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:36,277 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:36,284 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:36,286 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:36,926 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:36,930 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:36,932 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:36,940 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:36,941 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:37,582 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:37,585 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:37,587 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:37,595 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:37,596 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:38,238 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:38,242 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:38,244 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:38,252 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:38,253 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:38,900 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:38,904 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:38,906 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:38,913 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:38,915 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:39,560 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:39,564 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:39,566 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:39,573 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:39,575 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:40,230 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:40,234 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:40,236 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:40,245 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:40,247 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:40,892 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:40,896 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:40,898 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:40,905 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:40,907 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:41,553 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:41,557 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:41,559 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:41,566 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:41,567 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:42,209 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:42,213 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:42,215 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:42,222 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:42,223 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:42,869 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:42,873 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:42,875 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:42,882 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:42,884 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:43,529 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:43,533 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:43,535 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:43,542 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:43,544 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:44,189 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:44,193 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:44,195 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:44,202 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:44,204 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:44,849 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:44,853 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:44,855 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:44,862 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:44,864 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:45,505 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:45,514 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:45,516 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:45,524 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:45,525 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:46,166 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:46,170 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:46,172 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:46,179 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:46,180 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:46,825 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:46,829 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:46,831 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:46,838 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:46,840 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:47,486 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:47,489 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:47,491 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:47,498 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:47,500 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:48,145 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:48,148 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:48,150 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:48,158 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:48,159 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:48,811 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:48,815 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:48,817 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:48,824 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:48,826 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:49,466 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:49,470 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:49,472 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:49,479 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:49,481 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:50,126 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:50,130 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:50,132 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:50,141 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:50,142 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:50,796 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:50,800 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:50,802 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:50,809 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:50,811 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:51,461 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:51,465 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:51,467 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:51,474 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:51,476 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:52,117 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:52,121 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:52,123 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:52,130 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:52,131 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:52,777 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:52,781 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:52,782 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:52,790 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:52,791 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:53,438 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:53,441 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:53,443 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:53,451 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:53,452 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:54,094 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:54,098 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:54,100 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:54,107 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:54,109 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:54,756 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:54,760 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:54,762 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:54,769 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:54,771 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:55,420 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:55,424 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:55,425 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:55,433 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:55,434 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:56,077 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:56,081 - mindformers[mindformers/modules/layers.py:554] - WARNING - The user passed the custom defined activation function True. If the user want to enable shard for the activation cell, the user should set the shard for each primitives in the cell.
2023-12-30 00:14:56,083 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:57,063 - mindformers[mindformers/version_control.py:212] - WARNING - Current MindSpore do not support big kernel SiLU and RMSNorm, please upgrade to 2.2.10 or later version.
2023-12-30 00:14:57,078 - mindformers[mindformers/models/base_model.py:117] - INFO - model built, but weights is unloaded, since the config has no checkpoint_name_or_path attribute or checkpoint_name_or_path is None.
2023-12-30 00:14:57,093 - mindformers[mindformers/trainer/base_trainer.py:515] - INFO - Network Parameters: 14167 M.
[WARNING] DEVICE(87237,ffffb3047930,python):2023-12-30-00:14:58.432.866 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_memory_adapter.cc:103] Initialize] Reserved memory size for other components(1073741824) is less than recommend size(2145292800), It may lead to Out Of Memory in HCCL or other components, Please double check context key 'variable_memory_max_size'/'max_device_memory'
/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float64'> type is zero.
setattr(self, word, getattr(machar, word).flat[0])
/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero.
setattr(self, word, getattr(machar, word).flat[0])
[WARNING] PRE_ACT(87237,ffffb3047930,python):2023-12-30-00:16:38.750.638 [mindspore/ccsrc/backend/common/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.cc:84] IncreaseAllgatherFusionId] Increase the duplicated allgather fusion id
2023-12-30 00:17:49,800 - mindformers[mindformers/trainer/utils.py:596] - INFO - .............Start load checkpoint from checkpoint..................
2023-12-30 00:17:49,801 - mindformers[mindformers/trainer/utils.py:245] - INFO - When distributed loads are sliced weights,load_checkpoint should be a checkpoint directory containing the directory of rank_{0-*},The directory structure is as follows: **checkpoint_root_dir/rank_{0-*}/**.ckpt
2023-12-30 00:19:26,982 - mindformers[mindformers/trainer/utils.py:258] - INFO - Distribute load is success.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:29.643.743 [mindspore/train/serialization.py:172] The type of transformer.wte.embedding_weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:32.611.345 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:32.814.382 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:32.865.933 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:33.210.12 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:33.116.387 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:33.206.440 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:33.302.318 [mindspore/train/serialization.py:172] The type of transformer.layers.0.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:33.534.612 [mindspore/train/serialization.py:172] The type of transformer.layers.0.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:34.565.13 [mindspore/train/serialization.py:172] The type of transformer.layers.0.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:34.558.620 [mindspore/train/serialization.py:172] The type of transformer.layers.0.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:34.906.088 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.112.506 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.210.534 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.304.827 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.397.993 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.508.718 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.604.662 [mindspore/train/serialization.py:172] The type of transformer.layers.1.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:35.886.504 [mindspore/train/serialization.py:172] The type of transformer.layers.1.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:36.427.475 [mindspore/train/serialization.py:172] The type of transformer.layers.1.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:37.186.58 [mindspore/train/serialization.py:172] The type of transformer.layers.1.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:37.385.227 [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:37.598.188 [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:37.687.603 [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:37.802.418 [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:37.897.706 [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:38.916. [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:38.935.86 [mindspore/train/serialization.py:172] The type of transformer.layers.2.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:38.389.393 [mindspore/train/serialization.py:172] The type of transformer.layers.2.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:38.992.544 [mindspore/train/serialization.py:172] The type of transformer.layers.2.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:39.532.962 [mindspore/train/serialization.py:172] The type of transformer.layers.2.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:39.876.857 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.685.51 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.159.963 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.280.464 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.384.605 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.416.777 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.508.013 [mindspore/train/serialization.py:172] The type of transformer.layers.3.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.606.761 [mindspore/train/serialization.py:172] The type of transformer.layers.3.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:40.960.073 [mindspore/train/serialization.py:172] The type of transformer.layers.3.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:41.315.835 [mindspore/train/serialization.py:172] The type of transformer.layers.3.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:41.621.338 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:41.748.162 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:41.854.197 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:41.886.329 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:41.978.509 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:42.107.24 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:42.103.064 [mindspore/train/serialization.py:172] The type of transformer.layers.4.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:42.190.918 [mindspore/train/serialization.py:172] The type of transformer.layers.4.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:42.544.984 [mindspore/train/serialization.py:172] The type of transformer.layers.4.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:42.941.873 [mindspore/train/serialization.py:172] The type of transformer.layers.4.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.233.123 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.355.428 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.445.002 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.476.827 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.562.141 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.594.368 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.686.504 [mindspore/train/serialization.py:172] The type of transformer.layers.5.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:43.776.271 [mindspore/train/serialization.py:172] The type of transformer.layers.5.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:44.112.113 [mindspore/train/serialization.py:172] The type of transformer.layers.5.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:44.466.775 [mindspore/train/serialization.py:172] The type of transformer.layers.5.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:44.767.148 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:44.886.560 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:44.977.884 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.291.9 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.913.29 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.122.321 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.214.277 [mindspore/train/serialization.py:172] The type of transformer.layers.6.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.284.537 [mindspore/train/serialization.py:172] The type of transformer.layers.6.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.619.414 [mindspore/train/serialization.py:172] The type of transformer.layers.6.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:45.956.637 [mindspore/train/serialization.py:172] The type of transformer.layers.6.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.242.113 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.377.666 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.471.024 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.501.071 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.593.557 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.620.175 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.710.899 [mindspore/train/serialization.py:172] The type of transformer.layers.7.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:46.793.061 [mindspore/train/serialization.py:172] The type of transformer.layers.7.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.143.461 [mindspore/train/serialization.py:172] The type of transformer.layers.7.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.473.350 [mindspore/train/serialization.py:172] The type of transformer.layers.7.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.628.073 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.753.922 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.846.836 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.875.535 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.967.234 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:47.999.337 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:48.890.48 [mindspore/train/serialization.py:172] The type of transformer.layers.8.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:48.165.084 [mindspore/train/serialization.py:172] The type of transformer.layers.8.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:48.681.108 [mindspore/train/serialization.py:172] The type of transformer.layers.8.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:48.976.043 [mindspore/train/serialization.py:172] The type of transformer.layers.8.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.317.928 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.479.756 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.553.552 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.626.140 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.686.796 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.766.473 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.827.273 [mindspore/train/serialization.py:172] The type of transformer.layers.9.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:49.909.200 [mindspore/train/serialization.py:172] The type of transformer.layers.9.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:50.279.948 [mindspore/train/serialization.py:172] The type of transformer.layers.9.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:50.646.072 [mindspore/train/serialization.py:172] The type of transformer.layers.9.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:50.943.834 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.680.35 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.162.104 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.198.512 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.302.291 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.336.603 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.426.555 [mindspore/train/serialization.py:172] The type of transformer.layers.10.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.515.263 [mindspore/train/serialization.py:172] The type of transformer.layers.10.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:51.876.965 [mindspore/train/serialization.py:172] The type of transformer.layers.10.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.235.903 [mindspore/train/serialization.py:172] The type of transformer.layers.10.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.496.045 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.578.253 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.644.899 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.676.079 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.767.318 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.790.577 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.852.412 [mindspore/train/serialization.py:172] The type of transformer.layers.11.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:52.909.901 [mindspore/train/serialization.py:172] The type of transformer.layers.11.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.117.267 [mindspore/train/serialization.py:172] The type of transformer.layers.11.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.306.410 [mindspore/train/serialization.py:172] The type of transformer.layers.11.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.471.388 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.555.691 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.622.197 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.640.009 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.694.637 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.734.799 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.823.553 [mindspore/train/serialization.py:172] The type of transformer.layers.12.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:53.904.138 [mindspore/train/serialization.py:172] The type of transformer.layers.12.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:54.132.502 [mindspore/train/serialization.py:172] The type of transformer.layers.12.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:54.475.075 [mindspore/train/serialization.py:172] The type of transformer.layers.12.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:54.837.669 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:54.973.434 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.637.53 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.115.734 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.167.208 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.207.468 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.300.499 [mindspore/train/serialization.py:172] The type of transformer.layers.13.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.382.921 [mindspore/train/serialization.py:172] The type of transformer.layers.13.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.592.604 [mindspore/train/serialization.py:172] The type of transformer.layers.13.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:55.955.980 [mindspore/train/serialization.py:172] The type of transformer.layers.13.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.304.945 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.463.094 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.524.711 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.564.815 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.660.107 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.692.217 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:56.757.487 [mindspore/train/serialization.py:172] The type of transformer.layers.14.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:57.474.1 [mindspore/train/serialization.py:172] The type of transformer.layers.14.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:57.364.427 [mindspore/train/serialization.py:172] The type of transformer.layers.14.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:57.663.272 [mindspore/train/serialization.py:172] The type of transformer.layers.14.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:57.919.954 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.964.04 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.163.756 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.203.870 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.293.580 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.331.781 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.423.184 [mindspore/train/serialization.py:172] The type of transformer.layers.15.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.504.562 [mindspore/train/serialization.py:172] The type of transformer.layers.15.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:58.795.691 [mindspore/train/serialization.py:172] The type of transformer.layers.15.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:59.387.846 [mindspore/train/serialization.py:172] The type of transformer.layers.15.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:59.634.479 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:59.758.031 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:59.816.496 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:59.855.170 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:19:59.946.318 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:00.558.38 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:00.116.309 [mindspore/train/serialization.py:172] The type of transformer.layers.16.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:00.210.795 [mindspore/train/serialization.py:172] The type of transformer.layers.16.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:00.513.187 [mindspore/train/serialization.py:172] The type of transformer.layers.16.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:00.726.708 [mindspore/train/serialization.py:172] The type of transformer.layers.16.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:00.961.412 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.112.011 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.177.320 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.222.916 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.312.605 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.349.532 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.412.024 [mindspore/train/serialization.py:172] The type of transformer.layers.17.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.506.050 [mindspore/train/serialization.py:172] The type of transformer.layers.17.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.748.346 [mindspore/train/serialization.py:172] The type of transformer.layers.17.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:01.973.622 [mindspore/train/serialization.py:172] The type of transformer.layers.17.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.128.179 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.229.669 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.302.768 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.345.515 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.406.901 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.449.507 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.536.945 [mindspore/train/serialization.py:172] The type of transformer.layers.18.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.619.002 [mindspore/train/serialization.py:172] The type of transformer.layers.18.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:02.827.554 [mindspore/train/serialization.py:172] The type of transformer.layers.18.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.911.18 [mindspore/train/serialization.py:172] The type of transformer.layers.18.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.325.664 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.451.490 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.544.533 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.576.664 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.654.106 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.688.260 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.780.785 [mindspore/train/serialization.py:172] The type of transformer.layers.19.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:03.877.374 [mindspore/train/serialization.py:172] The type of transformer.layers.19.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:04.202.281 [mindspore/train/serialization.py:172] The type of transformer.layers.19.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:04.540.706 [mindspore/train/serialization.py:172] The type of transformer.layers.19.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:04.753.268 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:04.850.076 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:04.911.505 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:04.951.761 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.479.51 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.808.51 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.144.028 [mindspore/train/serialization.py:172] The type of transformer.layers.20.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.227.934 [mindspore/train/serialization.py:172] The type of transformer.layers.20.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.507.191 [mindspore/train/serialization.py:172] The type of transformer.layers.20.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.741.909 [mindspore/train/serialization.py:172] The type of transformer.layers.20.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:05.913.295 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.151.02 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.110.182 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.143.177 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.207.493 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.240.738 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.310.663 [mindspore/train/serialization.py:172] The type of transformer.layers.21.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.394.765 [mindspore/train/serialization.py:172] The type of transformer.layers.21.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.622.853 [mindspore/train/serialization.py:172] The type of transformer.layers.21.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:06.852.884 [mindspore/train/serialization.py:172] The type of transformer.layers.21.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.108.13 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.113.861 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.202.209 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.235.192 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.294.596 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.344.243 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.438.072 [mindspore/train/serialization.py:172] The type of transformer.layers.22.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.545.969 [mindspore/train/serialization.py:172] The type of transformer.layers.22.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:07.783.346 [mindspore/train/serialization.py:172] The type of transformer.layers.22.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.710.9 [mindspore/train/serialization.py:172] The type of transformer.layers.22.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.163.652 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.293.650 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.397.280 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.430.064 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.486.836 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.519.502 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.581.809 [mindspore/train/serialization.py:172] The type of transformer.layers.23.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.693.016 [mindspore/train/serialization.py:172] The type of transformer.layers.23.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:08.936.416 [mindspore/train/serialization.py:172] The type of transformer.layers.23.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.163.681 [mindspore/train/serialization.py:172] The type of transformer.layers.23.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.320.441 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.449.952 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.543.720 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.588.307 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.683.133 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.715.993 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.778.900 [mindspore/train/serialization.py:172] The type of transformer.layers.24.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:09.891.674 [mindspore/train/serialization.py:172] The type of transformer.layers.24.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.117.625 [mindspore/train/serialization.py:172] The type of transformer.layers.24.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.356.864 [mindspore/train/serialization.py:172] The type of transformer.layers.24.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.629.171 [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.753.828 [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.844.225 [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.881.703 [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:10.968.741 [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:11.408. [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:11.680.79 [mindspore/train/serialization.py:172] The type of transformer.layers.25.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:11.154.774 [mindspore/train/serialization.py:172] The type of transformer.layers.25.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:11.497.490 [mindspore/train/serialization.py:172] The type of transformer.layers.25.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:11.847.382 [mindspore/train/serialization.py:172] The type of transformer.layers.25.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.138.687 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.269.221 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.360.666 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.400.769 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.493.261 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.528.398 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.621.801 [mindspore/train/serialization.py:172] The type of transformer.layers.26.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:12.710.419 [mindspore/train/serialization.py:172] The type of transformer.layers.26.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:13.638.50 [mindspore/train/serialization.py:172] The type of transformer.layers.26.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:13.445.030 [mindspore/train/serialization.py:172] The type of transformer.layers.26.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:13.701.809 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:13.826.937 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:13.904.706 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:13.944.969 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.344.63 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.699.51 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.158.195 [mindspore/train/serialization.py:172] The type of transformer.layers.27.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.264.919 [mindspore/train/serialization.py:172] The type of transformer.layers.27.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.485.471 [mindspore/train/serialization.py:172] The type of transformer.layers.27.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.705.548 [mindspore/train/serialization.py:172] The type of transformer.layers.27.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.867.368 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:14.993.167 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.834.85 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.130.802 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.220.610 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.259.697 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.352.288 [mindspore/train/serialization.py:172] The type of transformer.layers.28.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.455.665 [mindspore/train/serialization.py:172] The type of transformer.layers.28.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.691.411 [mindspore/train/serialization.py:172] The type of transformer.layers.28.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:15.925.196 [mindspore/train/serialization.py:172] The type of transformer.layers.28.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.939.77 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.224.727 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.315.400 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.347.918 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.401.128 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.450.668 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.543.591 [mindspore/train/serialization.py:172] The type of transformer.layers.29.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.646.280 [mindspore/train/serialization.py:172] The type of transformer.layers.29.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:16.846.158 [mindspore/train/serialization.py:172] The type of transformer.layers.29.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.331.09 [mindspore/train/serialization.py:172] The type of transformer.layers.29.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.184.317 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.300.414 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.389.959 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.429.820 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.520.308 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.567.747 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.657.636 [mindspore/train/serialization.py:172] The type of transformer.layers.30.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.728.735 [mindspore/train/serialization.py:172] The type of transformer.layers.30.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:17.928.203 [mindspore/train/serialization.py:172] The type of transformer.layers.30.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.114.830 [mindspore/train/serialization.py:172] The type of transformer.layers.30.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.277.994 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.404.092 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.580.551 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.607.950 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.704.398 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.740.571 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.829.036 [mindspore/train/serialization.py:172] The type of transformer.layers.31.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:18.905.601 [mindspore/train/serialization.py:172] The type of transformer.layers.31.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:19.272.387 [mindspore/train/serialization.py:172] The type of transformer.layers.31.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:19.625.457 [mindspore/train/serialization.py:172] The type of transformer.layers.31.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:19.923.697 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.301.02 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.885.52 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.123.411 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.212.266 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.241.343 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.332.399 [mindspore/train/serialization.py:172] The type of transformer.layers.32.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.407.361 [mindspore/train/serialization.py:172] The type of transformer.layers.32.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.715.249 [mindspore/train/serialization.py:172] The type of transformer.layers.32.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:20.914.293 [mindspore/train/serialization.py:172] The type of transformer.layers.32.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.674.88 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.188.920 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.268.550 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.300.973 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.385.320 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.419.537 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.506.842 [mindspore/train/serialization.py:172] The type of transformer.layers.33.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.585.341 [mindspore/train/serialization.py:172] The type of transformer.layers.33.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.769.195 [mindspore/train/serialization.py:172] The type of transformer.layers.33.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:21.967.155 [mindspore/train/serialization.py:172] The type of transformer.layers.33.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.122.703 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.233.893 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.319.927 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.354.878 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.458.756 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.485.193 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.572.608 [mindspore/train/serialization.py:172] The type of transformer.layers.34.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.640.210 [mindspore/train/serialization.py:172] The type of transformer.layers.34.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:22.840.375 [mindspore/train/serialization.py:172] The type of transformer.layers.34.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.256.54 [mindspore/train/serialization.py:172] The type of transformer.layers.34.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.172.301 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.307.490 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.395.371 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.430.829 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.515.615 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.548.733 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.635.346 [mindspore/train/serialization.py:172] The type of transformer.layers.35.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.702.520 [mindspore/train/serialization.py:172] The type of transformer.layers.35.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:23.901.627 [mindspore/train/serialization.py:172] The type of transformer.layers.35.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.903.65 [mindspore/train/serialization.py:172] The type of transformer.layers.35.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.246.005 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.360.536 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.419.082 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.455.698 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.540.870 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.570.835 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.665.868 [mindspore/train/serialization.py:172] The type of transformer.layers.36.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.733.669 [mindspore/train/serialization.py:172] The type of transformer.layers.36.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:24.922.977 [mindspore/train/serialization.py:172] The type of transformer.layers.36.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.111.864 [mindspore/train/serialization.py:172] The type of transformer.layers.36.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.263.407 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.414.406 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.501.113 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.533.688 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.616.836 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.640.137 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.795.118 [mindspore/train/serialization.py:172] The type of transformer.layers.37.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:25.926.389 [mindspore/train/serialization.py:172] The type of transformer.layers.37.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:26.303.492 [mindspore/train/serialization.py:172] The type of transformer.layers.37.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:26.664.528 [mindspore/train/serialization.py:172] The type of transformer.layers.37.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:26.809.632 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:26.927.201 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.211.40 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.514.75 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.141.832 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.163.909 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.226.285 [mindspore/train/serialization.py:172] The type of transformer.layers.38.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.387.562 [mindspore/train/serialization.py:172] The type of transformer.layers.38.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.636.010 [mindspore/train/serialization.py:172] The type of transformer.layers.38.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:27.901.214 [mindspore/train/serialization.py:172] The type of transformer.layers.38.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.500.45 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wo.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.167.980 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wq.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.253.882 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wq.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.285.260 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wk.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.373.861 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wk.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.402.797 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wv.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.504.672 [mindspore/train/serialization.py:172] The type of transformer.layers.39.attention.wv.bias:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.677.928 [mindspore/train/serialization.py:172] The type of transformer.layers.39.feed_forward.w1.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:28.975.418 [mindspore/train/serialization.py:172] The type of transformer.layers.39.feed_forward.w2.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:29.212.831 [mindspore/train/serialization.py:172] The type of transformer.layers.39.feed_forward.w3.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:30.718.306 [mindspore/train/serialization.py:172] The type of lm_head.weight:Float32 in 'parameter_dict' is different from the type of it in 'net':Float16, then the type convert from Float32 to Float16 in the network.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.310.992 [mindspore/train/serialization.py:1317] For 'load_param_into_net', 80 parameters in the 'net' are not loaded, because they are not in the 'parameter_dict', please check whether the network structure is consistent when training and loading checkpoint.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.369 [mindspore/train/serialization.py:1322] transformer.layers.0.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.462 [mindspore/train/serialization.py:1322] transformer.layers.0.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.530 [mindspore/train/serialization.py:1322] transformer.layers.1.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.616 [mindspore/train/serialization.py:1322] transformer.layers.1.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.673 [mindspore/train/serialization.py:1322] transformer.layers.2.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.727 [mindspore/train/serialization.py:1322] transformer.layers.2.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.782 [mindspore/train/serialization.py:1322] transformer.layers.3.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.834 [mindspore/train/serialization.py:1322] transformer.layers.3.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.886 [mindspore/train/serialization.py:1322] transformer.layers.4.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.939 [mindspore/train/serialization.py:1322] transformer.layers.4.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.311.993 [mindspore/train/serialization.py:1322] transformer.layers.5.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.044 [mindspore/train/serialization.py:1322] transformer.layers.5.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.114 [mindspore/train/serialization.py:1322] transformer.layers.6.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.167 [mindspore/train/serialization.py:1322] transformer.layers.6.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.218 [mindspore/train/serialization.py:1322] transformer.layers.7.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.269 [mindspore/train/serialization.py:1322] transformer.layers.7.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.320 [mindspore/train/serialization.py:1322] transformer.layers.8.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.370 [mindspore/train/serialization.py:1322] transformer.layers.8.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.418 [mindspore/train/serialization.py:1322] transformer.layers.9.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.467 [mindspore/train/serialization.py:1322] transformer.layers.9.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.515 [mindspore/train/serialization.py:1322] transformer.layers.10.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.564 [mindspore/train/serialization.py:1322] transformer.layers.10.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.612 [mindspore/train/serialization.py:1322] transformer.layers.11.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.660 [mindspore/train/serialization.py:1322] transformer.layers.11.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.709 [mindspore/train/serialization.py:1322] transformer.layers.12.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.768 [mindspore/train/serialization.py:1322] transformer.layers.12.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.821 [mindspore/train/serialization.py:1322] transformer.layers.13.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.871 [mindspore/train/serialization.py:1322] transformer.layers.13.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.920 [mindspore/train/serialization.py:1322] transformer.layers.14.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.312.969 [mindspore/train/serialization.py:1322] transformer.layers.14.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.017 [mindspore/train/serialization.py:1322] transformer.layers.15.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.065 [mindspore/train/serialization.py:1322] transformer.layers.15.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.113 [mindspore/train/serialization.py:1322] transformer.layers.16.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.161 [mindspore/train/serialization.py:1322] transformer.layers.16.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.209 [mindspore/train/serialization.py:1322] transformer.layers.17.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.256 [mindspore/train/serialization.py:1322] transformer.layers.17.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.304 [mindspore/train/serialization.py:1322] transformer.layers.18.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.351 [mindspore/train/serialization.py:1322] transformer.layers.18.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.398 [mindspore/train/serialization.py:1322] transformer.layers.19.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.445 [mindspore/train/serialization.py:1322] transformer.layers.19.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.494 [mindspore/train/serialization.py:1322] transformer.layers.20.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.542 [mindspore/train/serialization.py:1322] transformer.layers.20.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.591 [mindspore/train/serialization.py:1322] transformer.layers.21.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.639 [mindspore/train/serialization.py:1322] transformer.layers.21.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.687 [mindspore/train/serialization.py:1322] transformer.layers.22.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.734 [mindspore/train/serialization.py:1322] transformer.layers.22.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.782 [mindspore/train/serialization.py:1322] transformer.layers.23.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.831 [mindspore/train/serialization.py:1322] transformer.layers.23.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.887 [mindspore/train/serialization.py:1322] transformer.layers.24.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.937 [mindspore/train/serialization.py:1322] transformer.layers.24.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.313.986 [mindspore/train/serialization.py:1322] transformer.layers.25.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.034 [mindspore/train/serialization.py:1322] transformer.layers.25.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.083 [mindspore/train/serialization.py:1322] transformer.layers.26.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.131 [mindspore/train/serialization.py:1322] transformer.layers.26.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.179 [mindspore/train/serialization.py:1322] transformer.layers.27.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.226 [mindspore/train/serialization.py:1322] transformer.layers.27.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.273 [mindspore/train/serialization.py:1322] transformer.layers.28.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.321 [mindspore/train/serialization.py:1322] transformer.layers.28.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.368 [mindspore/train/serialization.py:1322] transformer.layers.29.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.416 [mindspore/train/serialization.py:1322] transformer.layers.29.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.463 [mindspore/train/serialization.py:1322] transformer.layers.30.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.511 [mindspore/train/serialization.py:1322] transformer.layers.30.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.558 [mindspore/train/serialization.py:1322] transformer.layers.31.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.605 [mindspore/train/serialization.py:1322] transformer.layers.31.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.653 [mindspore/train/serialization.py:1322] transformer.layers.32.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.699 [mindspore/train/serialization.py:1322] transformer.layers.32.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.746 [mindspore/train/serialization.py:1322] transformer.layers.33.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.793 [mindspore/train/serialization.py:1322] transformer.layers.33.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.839 [mindspore/train/serialization.py:1322] transformer.layers.34.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.886 [mindspore/train/serialization.py:1322] transformer.layers.34.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.940 [mindspore/train/serialization.py:1322] transformer.layers.35.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.314.992 [mindspore/train/serialization.py:1322] transformer.layers.35.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.043 [mindspore/train/serialization.py:1322] transformer.layers.36.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.093 [mindspore/train/serialization.py:1322] transformer.layers.36.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.141 [mindspore/train/serialization.py:1322] transformer.layers.37.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.190 [mindspore/train/serialization.py:1322] transformer.layers.37.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.238 [mindspore/train/serialization.py:1322] transformer.layers.38.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.285 [mindspore/train/serialization.py:1322] transformer.layers.38.attention.kvcache_mgr.value_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.333 [mindspore/train/serialization.py:1322] transformer.layers.39.attention.kvcache_mgr.key_past is not loaded.
[WARNING] ME(87237:281473685158192,MainProcess):2023-12-30-00:20:33.315.380 [mindspore/train/serialization.py:1322] transformer.layers.39.attention.kvcache_mgr.value_past is not loaded.
2023-12-30 00:20:33,315 - mindformers[mindformers/trainer/utils.py:607] - INFO - Network parameters are not loaded: (['transformer.layers.0.attention.kvcache_mgr.key_past', 'transformer.layers.0.attention.kvcache_mgr.value_past', 'transformer.layers.1.attention.kvcache_mgr.key_past', 'transformer.layers.1.attention.kvcache_mgr.value_past', 'transformer.layers.2.attention.kvcache_mgr.key_past', 'transformer.layers.2.attention.kvcache_mgr.value_past', 'transformer.layers.3.attention.kvcache_mgr.key_past', 'transformer.layers.3.attention.kvcache_mgr.value_past', 'transformer.layers.4.attention.kvcache_mgr.key_past', 'transformer.layers.4.attention.kvcache_mgr.value_past', 'transformer.layers.5.attention.kvcache_mgr.key_past', 'transformer.layers.5.attention.kvcache_mgr.value_past', 'transformer.layers.6.attention.kvcache_mgr.key_past', 'transformer.layers.6.attention.kvcache_mgr.value_past', 'transformer.layers.7.attention.kvcache_mgr.key_past', 'transformer.layers.7.attention.kvcache_mgr.value_past', 'transformer.layers.8.attention.kvcache_mgr.key_past', 'transformer.layers.8.attention.kvcache_mgr.value_past', 'transformer.layers.9.attention.kvcache_mgr.key_past', 'transformer.layers.9.attention.kvcache_mgr.value_past', 'transformer.layers.10.attention.kvcache_mgr.key_past', 'transformer.layers.10.attention.kvcache_mgr.value_past', 'transformer.layers.11.attention.kvcache_mgr.key_past', 'transformer.layers.11.attention.kvcache_mgr.value_past', 'transformer.layers.12.attention.kvcache_mgr.key_past', 'transformer.layers.12.attention.kvcache_mgr.value_past', 'transformer.layers.13.attention.kvcache_mgr.key_past', 'transformer.layers.13.attention.kvcache_mgr.value_past', 'transformer.layers.14.attention.kvcache_mgr.key_past', 'transformer.layers.14.attention.kvcache_mgr.value_past', 'transformer.layers.15.attention.kvcache_mgr.key_past', 'transformer.layers.15.attention.kvcache_mgr.value_past', 'transformer.layers.16.attention.kvcache_mgr.key_past', 'transformer.layers.16.attention.kvcache_mgr.value_past', 'transformer.layers.17.attention.kvcache_mgr.key_past', 'transformer.layers.17.attention.kvcache_mgr.value_past', 'transformer.layers.18.attention.kvcache_mgr.key_past', 'transformer.layers.18.attention.kvcache_mgr.value_past', 'transformer.layers.19.attention.kvcache_mgr.key_past', 'transformer.layers.19.attention.kvcache_mgr.value_past', 'transformer.layers.20.attention.kvcache_mgr.key_past', 'transformer.layers.20.attention.kvcache_mgr.value_past', 'transformer.layers.21.attention.kvcache_mgr.key_past', 'transformer.layers.21.attention.kvcache_mgr.value_past', 'transformer.layers.22.attention.kvcache_mgr.key_past', 'transformer.layers.22.attention.kvcache_mgr.value_past', 'transformer.layers.23.attention.kvcache_mgr.key_past', 'transformer.layers.23.attention.kvcache_mgr.value_past', 'transformer.layers.24.attention.kvcache_mgr.key_past', 'transformer.layers.24.attention.kvcache_mgr.value_past', 'transformer.layers.25.attention.kvcache_mgr.key_past', 'transformer.layers.25.attention.kvcache_mgr.value_past', 'transformer.layers.26.attention.kvcache_mgr.key_past', 'transformer.layers.26.attention.kvcache_mgr.value_past', 'transformer.layers.27.attention.kvcache_mgr.key_past', 'transformer.layers.27.attention.kvcache_mgr.value_past', 'transformer.layers.28.attention.kvcache_mgr.key_past', 'transformer.layers.28.attention.kvcache_mgr.value_past', 'transformer.layers.29.attention.kvcache_mgr.key_past', 'transformer.layers.29.attention.kvcache_mgr.value_past', 'transformer.layers.30.attention.kvcache_mgr.key_past', 'transformer.layers.30.attention.kvcache_mgr.value_past', 'transformer.layers.31.attention.kvcache_mgr.key_past', 'transformer.layers.31.attention.kvcache_mgr.value_past', 'transformer.layers.32.attention.kvcache_mgr.key_past', 'transformer.layers.32.attention.kvcache_mgr.value_past', 'transformer.layers.33.attention.kvcache_mgr.key_past', 'transformer.layers.33.attention.kvcache_mgr.value_past', 'transformer.layers.34.attention.kvcache_mgr.key_past', 'transformer.layers.34.attention.kvcache_mgr.value_past', 'transformer.layers.35.attention.kvcache_mgr.key_past', 'transformer.layers.35.attention.kvcache_mgr.value_past', 'transformer.layers.36.attention.kvcache_mgr.key_past', 'transformer.layers.36.attention.kvcache_mgr.value_past', 'transformer.layers.37.attention.kvcache_mgr.key_past', 'transformer.layers.37.attention.kvcache_mgr.value_past', 'transformer.layers.38.attention.kvcache_mgr.key_past', 'transformer.layers.38.attention.kvcache_mgr.value_past', 'transformer.layers.39.attention.kvcache_mgr.key_past', 'transformer.layers.39.attention.kvcache_mgr.value_past'], [])
{'auto_trans_ckpt': False,
'context': {'ascend_config': {'precision_mode': 'must_keep_origin_dtype'},
'device_id': 0,
'device_target': 'Ascend',
'enable_graph_kernel': False,
'graph_kernel_flags': '--disable_expand_ops=Softmax,Dropout '
'--enable_parallel_fusion=true '
'--reduce_fuse_depth=8 '
'--enable_auto_tensor_inplace=true',
'max_call_depth': 10000,
'save_graphs': False,
'save_graphs_path': './graph'},
'device_num': 4,
'infer': {'increment_model_path': '/path/qwen_7b_inc.mindir',
'infer_seq_length': 1024,
'prefill_model_path': '/path/qwen_7b_prefill.mindir'},
'load_checkpoint': '/data/modelscope/Qwen-14B-Chat-ms-parallel4',
'local_rank': 0,
'micro_batch_interleave_num': 1,
'model': {'arch': {'type': 'QwenForCausalLM'},
'model_config': {'batch_size': 1,
'checkpoint_name_or_path': None,
'compute_dtype': 'float16',
'do_sample': False,
'emb_dropout_prob': 0.0,
'eos_token_id': 151643,
'hidden_size': 5120,
'intermediate_size': 13696,
'kv_channels': 128,
'layernorm_compute_type': 'float32',
'max_decode_length': 512,
'num_attention_heads': 40,
'num_hidden_layers': 40,
'offset': 0,
'pad_token_id': 151643,
'param_init_type': 'float16',
'repetition_penalty': 1,
'rms_norm_eps': 1e-06,
'rotary_dtype': 'float16',
'rotary_emb_base': 10000,
'rotary_pct': 1.0,
'seq_length': 8192,
'softmax_compute_type': 'float16',
'top_k': 0,
'top_p': 0.8,
'type': 'QwenConfig',
'use_past': True,
'use_past_shard': False,
'vocab_size': 152064}},
'moe_config': <mindformers.modules.transformer.moe.MoEConfig object at 0xffff32a35250>,
'only_save_strategy': False,
'output_dir': './output',
'parallel': {'device_num': 4,
'enable_alltoall': False,
'enable_parallel_optimizer': True,
'full_batch': True,
'gradients_mean': False,
'parallel_mode': 'semi_auto_parallel',
'parallel_optimizer_config': {'gradient_accumulation_shard': False,
'parallel_optimizer_threshold': 64},
'search_mode': 'sharding_propagation',
'strategy_ckpt_save_file': './output/strategy/./ckpt_strategy_rank_0.ckpt'},
'parallel_config': <mindformers.modules.transformer.transformer.TransformerOpParallelConfig object at 0xfffea4364250>,
'processor': {'return_tensors': 'ms',
'tokenizer': {'model_max_length': 8192,
'pad_token': '<|endoftext|>',
'type': 'QwenTokenizer',
'vocab_file': '/data/modelscope/Qwen-14B-Chat/qwen.tiktoken'},
'type': 'QwenProcessor'},
'rank_id': 0,
'recompute_config': <mindformers.modules.transformer.transformer.TransformerRecomputeConfig object at 0xfffea465d8e0>,
'resume_training': False,
'run_mode': 'predict',
'runner_config': {'batch_size': 1,
'epochs': 1,
'gradient_accumulation_steps': 1,
'sink_mode': True,
'sink_size': 2},
'seed': 0,
'src_strategy_path_or_dir': '',
'trainer': {'model_name': 'qwen_14b', 'type': 'CausalLanguageModelingTrainer'},
'use_parallel': True}
2023-12-30 00:20:33,534 - mindformers[mindformers/generation/text_generator.py:1097] - INFO - Generation Config is: {'max_length': 2048, 'max_new_tokens': None, 'num_beams': 1, 'do_sample': False, 'use_past': True, 'temperature': 1.0, 'top_k': 0, 'top_p': 1.0, 'repetition_penalty': 1, 'encoder_repetition_penalty': 1.0, 'renormalize_logits': False, 'pad_token_id': 151643, 'bos_token_id': 1, 'eos_token_id': 151643, '_from_model_config': True}
2023-12-30 00:20:33,536 - mindformers[mindformers/generation/text_generator.py:176] - INFO - The generation mode will be **GREEDY_SEARCH**.
[WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:20:33.563.082 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 1 is not a tensor.
[WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:20:33.563.167 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 3 is not a tensor.
[WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:20:33.563.211 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 4 is not a tensor.
[WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:20:33.563.239 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 5 is not a tensor.
[WARNING] PRE_ACT(87237,ffffb3047930,python):2023-12-30-00:22:04.336.849 [mindspore/ccsrc/backend/common/pass/adjust_depend_for_parallel_optimizer_recompute_all_gather.cc:84] IncreaseAllgatherFusionId] Increase the duplicated allgather fusion id
[WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:23:19.827.953 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 1 is not a tensor.
[WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:23:19.828.091 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 3 is not a tensor.
[WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:23:19.828.136 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 4 is not a tensor.
[WARNING] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:23:19.828.166 [mindspore/ccsrc/frontend/parallel/step_parallel_utils.cc:1302] ExtendInputArgsAbstractShape] The input 5 is not a tensor.
[ERROR] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:24:09.687.723 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:143] CheckStrategyByVector] GatherInfo19941994: The strategy is ((1, 1), (1)), strategy len: 1 is not equal to inputs len: 2, index: 1
[ERROR] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:24:09.687.833 [mindspore/ccsrc/frontend/parallel/ops_info/operator_info.cc:962] InitForCostModelWithAutoRepeatCalc] GatherInfo19941994: CheckStrategy failed.
[ERROR] PARALLEL(87237,ffffb3047930,python):2023-12-30-00:24:09.687.865 [mindspore/ccsrc/frontend/parallel/ops_info/gather_info.cc:1255] Init] GatherInfo19941994: Init failed.
Traceback (most recent call last):
File "/data/test1229/mindformers/research/qwen/run_qwen.py", line 165, in <module>
main(task=args.task,
File "/data/test1229/mindformers/research/qwen/run_qwen.py", line 113, in main
result = task.predict(input_data=prompt,
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindspore/_checkparam.py", line 1313, in wrapper
return func(*args, **kwargs)
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/trainer/trainer.py", line 659, in predict
output_result = self.trainer.predict(
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/trainer/causal_language_modeling/causal_language_modeling.py", line 315, in predict
return self.predict_process(config=config,
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/trainer/base_trainer.py", line 878, in predict_process
output_results = self.pipeline_task(input_data, top_k=top_k)
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/pipeline/base_pipeline.py", line 123, in __call__
outputs = self.run_single(inputs, preprocess_params, forward_params, postprocess_params)
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/pipeline/base_pipeline.py", line 170, in run_single
model_outputs = self.forward(model_inputs, **forward_params)
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/pipeline/text_generation_pipeline.py", line 180, in forward
output_ids = self.network.generate(input_ids, **forward_params)
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/generation/text_generator.py", line 1114, in generate
output_ids = self._greedy_search(
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/generation/text_generator.py", line 394, in _greedy_search
res = self._incremental_infer(
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/generation/text_generator.py", line 238, in _incremental_infer
res = self(
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindspore/nn/cell.py", line 680, in __call__
out = self.compile_and_run(*args, **kwargs)
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindspore/nn/cell.py", line 1020, in compile_and_run
self.compile(*args, **kwargs)
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindspore/nn/cell.py", line 997, in compile
_cell_graph_executor.compile(self, phase=self.phase,
File "/root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindspore/common/api.py", line 1547, in compile
result = self._graph_executor.compile(obj, args, kwargs, phase, self._use_vm_mode())
RuntimeError: Failure:operator Gather init failed
----------------------------------------------------
- The Function Call Stack: (For framework developers)
----------------------------------------------------
In file /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/models/llama/llama_layer.py:171/ freqs_cos = self.reshape(self.gather(self.freqs_cos, batch_valid_length, 0), (batch_size, 1, 1, self.head_dim))/
In file /data/test1229/mindformers/research/qwen/qwen_model.py:294/ freqs_cis = self.freqs_mgr.increment(batch_valid_length, bs)/
In file /data/test1229/mindformers/research/qwen/qwen_model.py:290/ if self.is_first_iteration:/
In file /data/test1229/mindformers/research/qwen/qwen_model.py:284/ if not self.use_past:/
In file /data/test1229/mindformers/research/qwen/qwen_model.py:125/ output = self.transformer(tokens, init_reset=init_reset, batch_valid_length=batch_valid_length)/
In file /data/test1229/mindformers/research/qwen/qwen_model.py:123/ tokens = input_ids/
In file /data/test1229/mindformers/research/qwen/qwen_model.py:120/ if self.training:/
In file /data/test1229/mindformers/research/qwen/qwen_model.py:116/ if not isinstance(init_reset, Tensor):/
In file /data/test1229/mindformers/research/qwen/qwen_model.py:111/ def construct(self, input_ids, labels=None, input_position=None, position_ids=None, attention_mask=None,/
----------------------------------------------------
- C++ Call Stack: (For framework developers)
----------------------------------------------------
mindspore/ccsrc/frontend/parallel/step_parallel.cc:1655 ExtractStrategyAndInit
----------------------------------------------------
- The Traceback of Net Construct Code:
----------------------------------------------------
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:111
def construct(self, input_ids, labels=None, input_position=None, position_ids=None, attention_mask=None,
^
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:116
if not isinstance(init_reset, Tensor):
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:120
if self.training:
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:123
tokens = input_ids
^
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:125
output = self.transformer(tokens, init_reset=init_reset, batch_valid_length=batch_valid_length)
^
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:125
output = self.transformer(tokens, init_reset=init_reset, batch_valid_length=batch_valid_length)
^
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:125
output = self.transformer(tokens, init_reset=init_reset, batch_valid_length=batch_valid_length)
^
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:284
if not self.use_past:
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:290
if self.is_first_iteration:
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:295
if self.is_dynamic and self.is_flexible_shape and not self.use_kvcache_op:
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:300
mask = self.casual_mask.increment(self.kvcache_preprocess.range, batch_valid_length)
^
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:294
freqs_cis = self.freqs_mgr.increment(batch_valid_length, bs)
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:290
if self.is_first_iteration:
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:125
output = self.transformer(tokens, init_reset=init_reset, batch_valid_length=batch_valid_length)
^
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:306
for i in range(self.num_hidden_layers):
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:307
hidden_states = self.layers[i](hidden_states, freqs_cis, mask, kvcache_inputs=kvcache_inputs)
^
# In file /data/test1229/mindformers/research/qwen/qwen_model.py:307
hidden_states = self.layers[i](hidden_states, freqs_cis, mask, kvcache_inputs=kvcache_inputs)
^
# In file /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/models/llama/llama_transformer.py:489
h = self.attention(input_x, freqs_cis, mask, kvcache_inputs)
^
# In file /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/models/llama/llama_transformer.py:245
query = self.cast(self.wq(x), self.dtype) # dp, 1 -> dp, mp
^
# In file /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/modules/layers.py:487
if self.expert_flag:
# In file /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/modules/layers.py:487
if self.expert_flag:
# In file /root/anaconda3/envs/ms_dev/lib/python3.9/site-packages/mindformers/modules/layers.py:495
x = self.matmul(x, weight)
^
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment