diff --git a/examples/mcore/qwen3/data_convert_qwen3_instruction.sh b/examples/mcore/qwen3/data_convert_qwen3_instruction.sh index 1b56a02311f7b2f2713e92cbe8cfa75a7409fa93..6840e1fbec897c18b15aab5a70810c7395ccbd74 100644 --- a/examples/mcore/qwen3/data_convert_qwen3_instruction.sh +++ b/examples/mcore/qwen3/data_convert_qwen3_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/qwen3_hf/ \ + --tokenizer-name-or-path ./model_from_hf/qwen3_hf/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/examples/mcore/qwen3_moe/data_convert_qwen3_moe_instruction.sh b/examples/mcore/qwen3_moe/data_convert_qwen3_moe_instruction.sh index 3ac72f800ade1ecf53e24e7851ffdcf21c5a3c96..b71565b4551c500926550bde96881c83eea96fc4 100644 --- a/examples/mcore/qwen3_moe/data_convert_qwen3_moe_instruction.sh +++ b/examples/mcore/qwen3_moe/data_convert_qwen3_moe_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/qwen3_moe_hf/ \ + --tokenizer-name-or-path ./model_from_hf/qwen3_moe_hf/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/mindspeed_llm/training/arguments.py b/mindspeed_llm/training/arguments.py index 82481102a9767523ba62e30f24ecd17505ce8fc6..ebb988ca756963f6f21a5d4e2a15c7f2ac3d5cbd 100644 --- a/mindspeed_llm/training/arguments.py +++ b/mindspeed_llm/training/arguments.py @@ -1030,6 +1030,9 @@ def _validate_recompute_args(args): def _validate_instruction_finetune(args): if args.variable_seq_lengths: + if args.log_throughput: + args.log_throughput = False + print_rank0_by_args(args, f"In variable-seq-lengths mode, accurate TFLOPS cannot be calculated, set --log-throughput to False.") if args.context_parallel_size > 1 and args.pad_to_multiple_of % (args.tensor_model_parallel_size * args.context_parallel_size) != 0: raise AssertionError('pad_to_multiple_of must be divided by (tp * cp) when use cp.') if args.num_experts is not None and args.moe_token_dispatcher_type == "allgather": diff --git a/tests/0day/qwen3/ckpt_convert_qwen3_hf2mcore.sh b/tests/0day/qwen3/ckpt_convert_qwen3_hf2mcore.sh index 5dc6efe55cac6dea8d48852958f54bcc8e9484a3..27020458afa5c209984bca221e5aae2ff0064f4e 100644 --- a/tests/0day/qwen3/ckpt_convert_qwen3_hf2mcore.sh +++ b/tests/0day/qwen3/ckpt_convert_qwen3_hf2mcore.sh @@ -8,9 +8,9 @@ python convert_ckpt.py \ --save-model-type mg \ --target-tensor-parallel-size 4 \ --target-pipeline-parallel-size 2 \ - --load-dir ./mdoel_from_hf/Qwen3-1.7B-Base/ \ + --load-dir ./model_from_hf/Qwen3-1.7B-Base/ \ --save-dir ./model_weight/Qwen3-1.7B-mcore \ - --tokenizer-model ./mdoel_from_hf/Qwen3-1.7B-Base/tokenizer.json \ + --tokenizer-model ./model_from_hf/Qwen3-1.7B-Base/tokenizer.json \ --model-type-hf qwen3 \ --params-dtype bf16 \ --spec mindspeed_llm.tasks.models.spec.qwen3_spec layer_spec \ No newline at end of file diff --git a/tests/0day/qwen3/data_convert_qwen3_pretrain.sh b/tests/0day/qwen3/data_convert_qwen3_pretrain.sh index 55ac7aa55ff01f7f3d9be11ab7c6f7283c5967ad..e42fc75926ad6fb57641e764d291d089cbae0079 100644 --- a/tests/0day/qwen3/data_convert_qwen3_pretrain.sh +++ b/tests/0day/qwen3/data_convert_qwen3_pretrain.sh @@ -4,7 +4,7 @@ mkdir ./dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-1.7B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-1.7B-Base/ \ --tokenizer-type PretrainedFromHF \ --handler-name GeneralPretrainHandler \ --output-prefix ./dataset/enwiki \ diff --git a/tests/0day/qwen3/qwen3-0.6b/ckpt_convert_qwen3_0point6b_hf2mcore.sh b/tests/0day/qwen3/qwen3-0.6b/ckpt_convert_qwen3_0point6b_hf2mcore.sh index bc0490af1359edcf8c719f7e284cd6ed4e2b4ee5..6820c606406da726bfdac86d62bfdbe561b45c16 100644 --- a/tests/0day/qwen3/qwen3-0.6b/ckpt_convert_qwen3_0point6b_hf2mcore.sh +++ b/tests/0day/qwen3/qwen3-0.6b/ckpt_convert_qwen3_0point6b_hf2mcore.sh @@ -5,7 +5,7 @@ python convert_ckpt.py \ --save-model-type mg \ --target-tensor-parallel-size 1 \ --target-pipeline-parallel-size 1 \ - --load-dir ./mdoel_from_hf/Qwen3-0.6B-Base/ \ + --load-dir ./model_from_hf/Qwen3-0.6B-Base/ \ --save-dir ./model_weights/Qwen3-0.6B-mcore \ --tokenizer-model ./model_from_hf/Qwen3-0.6B-Base/tokenizer.json \ --model-type-hf qwen3 \ diff --git a/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_instruction.sh b/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_instruction.sh index e4ecd4e4c14f9d549dcbb98869d9b17173dd6a9d..48c251f5f52b0c318616968afcacecc68c335646 100644 --- a/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_instruction.sh +++ b/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-0.6B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-0.6B-Base/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_pretrain.sh b/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_pretrain.sh index dfa1028ab943bc60b13f5307737116f60417c644..0ac2162f99d580bd592fd5192bb35cd6c4d1086f 100644 --- a/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_pretrain.sh +++ b/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_pretrain.sh @@ -4,7 +4,7 @@ mkdir ./dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-0.6B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-0.6B-Base/ \ --tokenizer-type PretrainedFromHF \ --handler-name GeneralPretrainHandler \ --output-prefix ./dataset/enwiki \ diff --git a/tests/0day/qwen3/qwen3-1.7b/ckpt_convert_qwen3_1point7b_hf2mcore.sh b/tests/0day/qwen3/qwen3-1.7b/ckpt_convert_qwen3_1point7b_hf2mcore.sh index 7fb4565b8d5900b537e2803743f561112caaa3e3..fc7c39258d154e9a979e94e03c18a6760b000068 100644 --- a/tests/0day/qwen3/qwen3-1.7b/ckpt_convert_qwen3_1point7b_hf2mcore.sh +++ b/tests/0day/qwen3/qwen3-1.7b/ckpt_convert_qwen3_1point7b_hf2mcore.sh @@ -5,7 +5,7 @@ python convert_ckpt.py \ --save-model-type mg \ --target-tensor-parallel-size 1 \ --target-pipeline-parallel-size 1 \ - --load-dir ./mdoel_from_hf/Qwen3-1.7B-Base/ \ + --load-dir ./model_from_hf/Qwen3-1.7B-Base/ \ --save-dir ./model_weights/Qwen3-1.7B-mcore \ --tokenizer-model ./model_from_hf/Qwen3-1.7B-Base/tokenizer.json \ --model-type-hf qwen3 \ diff --git a/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_instruction.sh b/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_instruction.sh index 4bd69c7faea173c77e787b5183a679cb89d71922..e7ac95e11143f9a634d1b20f61f684c1ff2019bd 100644 --- a/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_instruction.sh +++ b/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-1.7B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-1.7B-Base/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_pretrain.sh b/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_pretrain.sh index fd0577451c7a8810ad8836d8b4d4b3c45840f9e1..f7ad51b47271aa91cd4b269bab2e798da3f420cb 100644 --- a/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_pretrain.sh +++ b/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_pretrain.sh @@ -4,7 +4,7 @@ mkdir ./dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-1.7B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-1.7B-Base/ \ --tokenizer-type PretrainedFromHF \ --handler-name GeneralPretrainHandler \ --output-prefix ./dataset/enwiki \ diff --git a/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_instruction.sh b/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_instruction.sh index b388060df77e4e96391657e58880d60f4ba8c8d5..4e5276783c2346f0add4b1e9525a6d3b4341ec0d 100644 --- a/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_instruction.sh +++ b/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-14B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-14B-Base/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_pretrain.sh b/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_pretrain.sh index 5a82bc2638c3aaf5d0ccec1deeb614a7357996ea..ef146a177246fe3d5b92437c31c2a48a9bea7114 100644 --- a/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_pretrain.sh +++ b/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_pretrain.sh @@ -4,7 +4,7 @@ mkdir ./dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-14B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-14B-Base/ \ --tokenizer-type PretrainedFromHF \ --handler-name GeneralPretrainHandler \ --output-prefix ./dataset/enwiki \ diff --git a/tests/0day/qwen3/qwen3-30b-a3b/data_convert_qwen3_a3b_instruction.sh b/tests/0day/qwen3/qwen3-30b-a3b/data_convert_qwen3_a3b_instruction.sh index 5191f756122eb3b13519d3c2e2c70726d6553bfa..0c42e7f2af71393988db6df2d2ea4c82e38f3867 100644 --- a/tests/0day/qwen3/qwen3-30b-a3b/data_convert_qwen3_a3b_instruction.sh +++ b/tests/0day/qwen3/qwen3-30b-a3b/data_convert_qwen3_a3b_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/qwen3_a3b_hf/ \ + --tokenizer-name-or-path ./model_from_hf/qwen3_a3b_hf/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_instruction.sh b/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_instruction.sh index f72cf82f4b63b03ce7132b7c63061ac2332f18a9..fdc8c68705081ba42a57787f3994a889e144c1c1 100644 --- a/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_instruction.sh +++ b/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-4B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-4B-Base/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_pretrain.sh b/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_pretrain.sh index 7e03701a27e018980f8795fb5cce5b18e8814766..17f5bf12841d99463f7529d6e6d4227a92f07fae 100644 --- a/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_pretrain.sh +++ b/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_pretrain.sh @@ -4,7 +4,7 @@ mkdir ./dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00042-d964455e17e96d5a.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-4B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-4B-Base/ \ --tokenizer-type PretrainedFromHF \ --handler-name GeneralPretrainHandler \ --output-prefix ./dataset/enwiki \ diff --git a/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_instruction.sh b/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_instruction.sh index e3a528b303f96636e8cdcf518241c0fa5be2fe1c..a06e71ee6d4c0be16abc567be3310ffe3fa8d473 100644 --- a/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_instruction.sh +++ b/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-8B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-8B-Base/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_pretrain.sh b/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_pretrain.sh index bda76b554a02d09b1974fadfe6e80a951c4eceff..e4f2d200f76d02fb2f572cfbbd473ee18261e7fb 100644 --- a/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_pretrain.sh +++ b/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_pretrain.sh @@ -4,7 +4,7 @@ mkdir ./dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00042-d964455e17e96d5a.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-8B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-8B-Base/ \ --tokenizer-type PretrainedFromHF \ --handler-name GeneralPretrainHandler \ --output-prefix ./dataset/enwiki \