From 3943740890458343fb5775fa6ab59ac4e14bf677 Mon Sep 17 00:00:00 2001 From: sunjunjie1587 Date: Fri, 26 Sep 2025 09:36:51 +0800 Subject: [PATCH] in variable_seq_lengths mode, set --log-throughput to false --- docs/pytorch/solutions/finetune/instruction_finetune.md | 2 +- docs/pytorch/solutions/pretrain/pretrain.md | 2 +- docs/pytorch/solutions/pretrain/pretrain_eod.md | 2 +- examples/mcore/qwen3/data_convert_qwen3_instruction.sh | 2 +- .../mcore/qwen3_moe/data_convert_qwen3_moe_instruction.sh | 2 +- .../qwen3_moe/data_convert_qwen3_moe_instruction_pack.sh | 2 +- .../mcore/qwen3_next/data_convert_qwen3_next_instruction.sh | 2 +- examples/mcore/seed_oss/data_convert_seed_oss_instruction.sh | 2 +- examples/mcore/seed_oss/data_convert_seed_oss_pretrain.sh | 2 +- examples/mindspore/qwen3/data_convert_qwen3_instruction.sh | 2 +- mindspeed_llm/features_manager/finetune/finetune.py | 4 ++++ tests/0day/qwen3/ckpt_convert_qwen3_hf2mcore.sh | 4 ++-- tests/0day/qwen3/data_convert_qwen3_pretrain.sh | 2 +- .../qwen3/qwen3-0.6b/ckpt_convert_qwen3_0point6b_hf2mcore.sh | 2 +- .../qwen3-0.6b/data_convert_qwen3_0point6b_instruction.sh | 2 +- .../qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_pretrain.sh | 2 +- .../qwen3/qwen3-1.7b/ckpt_convert_qwen3_1point7b_hf2mcore.sh | 2 +- .../qwen3-1.7b/data_convert_qwen3_1point7b_instruction.sh | 2 +- .../qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_pretrain.sh | 2 +- .../qwen3/qwen3-14b/data_convert_qwen3_14b_instruction.sh | 2 +- tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_pretrain.sh | 2 +- .../qwen3/qwen3-30b-a3b/data_convert_qwen3_a3b_instruction.sh | 2 +- .../0day/qwen3/qwen3-4b/data_convert_qwen3_4b_instruction.sh | 2 +- tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_pretrain.sh | 2 +- .../0day/qwen3/qwen3-8b/data_convert_qwen3_8b_instruction.sh | 2 +- tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_pretrain.sh | 2 +- 26 files changed, 30 insertions(+), 26 deletions(-) diff --git a/docs/pytorch/solutions/finetune/instruction_finetune.md b/docs/pytorch/solutions/finetune/instruction_finetune.md index 3f68217b0..ac78ca359 100644 --- a/docs/pytorch/solutions/finetune/instruction_finetune.md +++ b/docs/pytorch/solutions/finetune/instruction_finetune.md @@ -137,7 +137,7 @@ bash examples/mcore/qwen3/ckpt_convert_qwen3_hf2mcore.sh source /usr/local/Ascend/ascend-toolkit/set_env.sh # 修改为真实的ascend-toolkit路径 ...... --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet # 原始数据集路径 ---tokenizer-name-or-path ./mdoel_from_hf/qwen3_hf # HF的tokenizer路径 +--tokenizer-name-or-path ./model_from_hf/qwen3_hf # HF的tokenizer路径 --output-prefix ./finetune_dataset/alpaca # 保存路径 ...... ``` diff --git a/docs/pytorch/solutions/pretrain/pretrain.md b/docs/pytorch/solutions/pretrain/pretrain.md index 80de3f40e..4e5039a8c 100644 --- a/docs/pytorch/solutions/pretrain/pretrain.md +++ b/docs/pytorch/solutions/pretrain/pretrain.md @@ -40,7 +40,7 @@ source /usr/local/Ascend/nnal/atb/set_env.sh # 以具体的nnal路径为主 source /usr/local/Ascend/ascend-toolkit/set_env.sh # 修改为真实的ascend-toolkit路径 ...... --input ./dataset/train-00000-of-00042-d964455e17e96d5a.parquet # 原始数据集路径 ---tokenizer-name-or-path ./mdoel_from_hf/qwen3_hf # HF的tokenizer路径 +--tokenizer-name-or-path ./model_from_hf/qwen3_hf # HF的tokenizer路径 --output-prefix ./finetune_dataset/alpaca # 保存路径 ...... ``` diff --git a/docs/pytorch/solutions/pretrain/pretrain_eod.md b/docs/pytorch/solutions/pretrain/pretrain_eod.md index 1521696d2..4ddfeef62 100644 --- a/docs/pytorch/solutions/pretrain/pretrain_eod.md +++ b/docs/pytorch/solutions/pretrain/pretrain_eod.md @@ -30,7 +30,7 @@ source /usr/local/Ascend/nnal/atb/set_env.sh # 以具体的nnal路径为主 source /usr/local/Ascend/ascend-toolkit/set_env.sh # 修改为真实的ascend-toolkit路径 ...... --input ./dataset/train-00000-of-00042-d964455e17e96d5a.parquet # 原始数据集路径 ---tokenizer-name-or-path ./mdoel_from_hf/qwen3_hf # HF的tokenizer路径 +--tokenizer-name-or-path ./model_from_hf/qwen3_hf # HF的tokenizer路径 --output-prefix ./finetune_dataset/alpaca # 保存路径 --append-eod # 添加此参数开启pack模式数据预处理 ...... diff --git a/examples/mcore/qwen3/data_convert_qwen3_instruction.sh b/examples/mcore/qwen3/data_convert_qwen3_instruction.sh index 2dda0766e..e46fc8bc6 100644 --- a/examples/mcore/qwen3/data_convert_qwen3_instruction.sh +++ b/examples/mcore/qwen3/data_convert_qwen3_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/qwen3_hf/ \ + --tokenizer-name-or-path ./model_from_hf/qwen3_hf/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/examples/mcore/qwen3_moe/data_convert_qwen3_moe_instruction.sh b/examples/mcore/qwen3_moe/data_convert_qwen3_moe_instruction.sh index 14d39b128..5958407b1 100644 --- a/examples/mcore/qwen3_moe/data_convert_qwen3_moe_instruction.sh +++ b/examples/mcore/qwen3_moe/data_convert_qwen3_moe_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/qwen3_moe_hf/ \ + --tokenizer-name-or-path ./model_from_hf/qwen3_moe_hf/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/examples/mcore/qwen3_moe/data_convert_qwen3_moe_instruction_pack.sh b/examples/mcore/qwen3_moe/data_convert_qwen3_moe_instruction_pack.sh index 7928ab093..00ed92cb3 100644 --- a/examples/mcore/qwen3_moe/data_convert_qwen3_moe_instruction_pack.sh +++ b/examples/mcore/qwen3_moe/data_convert_qwen3_moe_instruction_pack.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/qwen3_moe_hf/ \ + --tokenizer-name-or-path ./model_from_hf/qwen3_moe_hf/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/examples/mcore/qwen3_next/data_convert_qwen3_next_instruction.sh b/examples/mcore/qwen3_next/data_convert_qwen3_next_instruction.sh index 843ae07d1..dda4f1b82 100644 --- a/examples/mcore/qwen3_next/data_convert_qwen3_next_instruction.sh +++ b/examples/mcore/qwen3_next/data_convert_qwen3_next_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/qwen3_next_hf/ \ + --tokenizer-name-or-path ./model_from_hf/qwen3_next_hf/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/examples/mcore/seed_oss/data_convert_seed_oss_instruction.sh b/examples/mcore/seed_oss/data_convert_seed_oss_instruction.sh index c148c41fc..309cc988d 100644 --- a/examples/mcore/seed_oss/data_convert_seed_oss_instruction.sh +++ b/examples/mcore/seed_oss/data_convert_seed_oss_instruction.sh @@ -4,7 +4,7 @@ mkdir ./dataset/seed_oss_finetune_dataset_cpm python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Seed-OSS-36B-Base_hf \ + --tokenizer-name-or-path ./model_from_hf/Seed-OSS-36B-Base_hf \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/examples/mcore/seed_oss/data_convert_seed_oss_pretrain.sh b/examples/mcore/seed_oss/data_convert_seed_oss_pretrain.sh index 7282f501e..9f54f46de 100644 --- a/examples/mcore/seed_oss/data_convert_seed_oss_pretrain.sh +++ b/examples/mcore/seed_oss/data_convert_seed_oss_pretrain.sh @@ -4,7 +4,7 @@ mkdir ./dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Seed-OSS-36B-Base_hf \ + --tokenizer-name-or-path ./model_from_hf/Seed-OSS-36B-Base_hf \ --output-prefix ./finetune_dataset/alpaca \ --tokenizer-type PretrainedFromHF \ --workers 4 \ diff --git a/examples/mindspore/qwen3/data_convert_qwen3_instruction.sh b/examples/mindspore/qwen3/data_convert_qwen3_instruction.sh index 2dda0766e..e46fc8bc6 100644 --- a/examples/mindspore/qwen3/data_convert_qwen3_instruction.sh +++ b/examples/mindspore/qwen3/data_convert_qwen3_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/qwen3_hf/ \ + --tokenizer-name-or-path ./model_from_hf/qwen3_hf/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/mindspeed_llm/features_manager/finetune/finetune.py b/mindspeed_llm/features_manager/finetune/finetune.py index a04e05ed6..04923a492 100644 --- a/mindspeed_llm/features_manager/finetune/finetune.py +++ b/mindspeed_llm/features_manager/finetune/finetune.py @@ -1,3 +1,4 @@ +import warnings from argparse import ArgumentParser from mindspeed.features_manager.feature import MindSpeedFeature @@ -27,6 +28,9 @@ class FinetuneFeature(MindSpeedFeature): def pre_validate_args(self, args): self.origin_variable_seq_lengths = None if args.variable_seq_lengths: + if args.log_throughput: + args.log_throughput = False + warnings.warn("In variable-seq-lengths mode, accurate TFLOPS cannot be calculated, set --log-throughput to False.", RuntimeWarning) self.origin_variable_seq_lengths = args.variable_seq_lengths args.variable_seq_lengths = False diff --git a/tests/0day/qwen3/ckpt_convert_qwen3_hf2mcore.sh b/tests/0day/qwen3/ckpt_convert_qwen3_hf2mcore.sh index 5dc6efe55..27020458a 100644 --- a/tests/0day/qwen3/ckpt_convert_qwen3_hf2mcore.sh +++ b/tests/0day/qwen3/ckpt_convert_qwen3_hf2mcore.sh @@ -8,9 +8,9 @@ python convert_ckpt.py \ --save-model-type mg \ --target-tensor-parallel-size 4 \ --target-pipeline-parallel-size 2 \ - --load-dir ./mdoel_from_hf/Qwen3-1.7B-Base/ \ + --load-dir ./model_from_hf/Qwen3-1.7B-Base/ \ --save-dir ./model_weight/Qwen3-1.7B-mcore \ - --tokenizer-model ./mdoel_from_hf/Qwen3-1.7B-Base/tokenizer.json \ + --tokenizer-model ./model_from_hf/Qwen3-1.7B-Base/tokenizer.json \ --model-type-hf qwen3 \ --params-dtype bf16 \ --spec mindspeed_llm.tasks.models.spec.qwen3_spec layer_spec \ No newline at end of file diff --git a/tests/0day/qwen3/data_convert_qwen3_pretrain.sh b/tests/0day/qwen3/data_convert_qwen3_pretrain.sh index 55ac7aa55..e42fc7592 100644 --- a/tests/0day/qwen3/data_convert_qwen3_pretrain.sh +++ b/tests/0day/qwen3/data_convert_qwen3_pretrain.sh @@ -4,7 +4,7 @@ mkdir ./dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-1.7B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-1.7B-Base/ \ --tokenizer-type PretrainedFromHF \ --handler-name GeneralPretrainHandler \ --output-prefix ./dataset/enwiki \ diff --git a/tests/0day/qwen3/qwen3-0.6b/ckpt_convert_qwen3_0point6b_hf2mcore.sh b/tests/0day/qwen3/qwen3-0.6b/ckpt_convert_qwen3_0point6b_hf2mcore.sh index bc0490af1..6820c6064 100644 --- a/tests/0day/qwen3/qwen3-0.6b/ckpt_convert_qwen3_0point6b_hf2mcore.sh +++ b/tests/0day/qwen3/qwen3-0.6b/ckpt_convert_qwen3_0point6b_hf2mcore.sh @@ -5,7 +5,7 @@ python convert_ckpt.py \ --save-model-type mg \ --target-tensor-parallel-size 1 \ --target-pipeline-parallel-size 1 \ - --load-dir ./mdoel_from_hf/Qwen3-0.6B-Base/ \ + --load-dir ./model_from_hf/Qwen3-0.6B-Base/ \ --save-dir ./model_weights/Qwen3-0.6B-mcore \ --tokenizer-model ./model_from_hf/Qwen3-0.6B-Base/tokenizer.json \ --model-type-hf qwen3 \ diff --git a/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_instruction.sh b/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_instruction.sh index e4ecd4e4c..48c251f5f 100644 --- a/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_instruction.sh +++ b/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-0.6B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-0.6B-Base/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_pretrain.sh b/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_pretrain.sh index dfa1028ab..0ac2162f9 100644 --- a/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_pretrain.sh +++ b/tests/0day/qwen3/qwen3-0.6b/data_convert_qwen3_0point6b_pretrain.sh @@ -4,7 +4,7 @@ mkdir ./dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-0.6B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-0.6B-Base/ \ --tokenizer-type PretrainedFromHF \ --handler-name GeneralPretrainHandler \ --output-prefix ./dataset/enwiki \ diff --git a/tests/0day/qwen3/qwen3-1.7b/ckpt_convert_qwen3_1point7b_hf2mcore.sh b/tests/0day/qwen3/qwen3-1.7b/ckpt_convert_qwen3_1point7b_hf2mcore.sh index 7fb4565b8..fc7c39258 100644 --- a/tests/0day/qwen3/qwen3-1.7b/ckpt_convert_qwen3_1point7b_hf2mcore.sh +++ b/tests/0day/qwen3/qwen3-1.7b/ckpt_convert_qwen3_1point7b_hf2mcore.sh @@ -5,7 +5,7 @@ python convert_ckpt.py \ --save-model-type mg \ --target-tensor-parallel-size 1 \ --target-pipeline-parallel-size 1 \ - --load-dir ./mdoel_from_hf/Qwen3-1.7B-Base/ \ + --load-dir ./model_from_hf/Qwen3-1.7B-Base/ \ --save-dir ./model_weights/Qwen3-1.7B-mcore \ --tokenizer-model ./model_from_hf/Qwen3-1.7B-Base/tokenizer.json \ --model-type-hf qwen3 \ diff --git a/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_instruction.sh b/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_instruction.sh index 4bd69c7fa..e7ac95e11 100644 --- a/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_instruction.sh +++ b/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-1.7B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-1.7B-Base/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_pretrain.sh b/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_pretrain.sh index fd0577451..f7ad51b47 100644 --- a/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_pretrain.sh +++ b/tests/0day/qwen3/qwen3-1.7b/data_convert_qwen3_1point7b_pretrain.sh @@ -4,7 +4,7 @@ mkdir ./dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-1.7B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-1.7B-Base/ \ --tokenizer-type PretrainedFromHF \ --handler-name GeneralPretrainHandler \ --output-prefix ./dataset/enwiki \ diff --git a/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_instruction.sh b/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_instruction.sh index b388060df..4e5276783 100644 --- a/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_instruction.sh +++ b/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-14B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-14B-Base/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_pretrain.sh b/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_pretrain.sh index 5a82bc263..ef146a177 100644 --- a/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_pretrain.sh +++ b/tests/0day/qwen3/qwen3-14b/data_convert_qwen3_14b_pretrain.sh @@ -4,7 +4,7 @@ mkdir ./dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-14B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-14B-Base/ \ --tokenizer-type PretrainedFromHF \ --handler-name GeneralPretrainHandler \ --output-prefix ./dataset/enwiki \ diff --git a/tests/0day/qwen3/qwen3-30b-a3b/data_convert_qwen3_a3b_instruction.sh b/tests/0day/qwen3/qwen3-30b-a3b/data_convert_qwen3_a3b_instruction.sh index 5191f7561..0c42e7f2a 100644 --- a/tests/0day/qwen3/qwen3-30b-a3b/data_convert_qwen3_a3b_instruction.sh +++ b/tests/0day/qwen3/qwen3-30b-a3b/data_convert_qwen3_a3b_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/qwen3_a3b_hf/ \ + --tokenizer-name-or-path ./model_from_hf/qwen3_a3b_hf/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_instruction.sh b/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_instruction.sh index f72cf82f4..fdc8c6870 100644 --- a/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_instruction.sh +++ b/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-4B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-4B-Base/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_pretrain.sh b/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_pretrain.sh index 7e03701a2..17f5bf128 100644 --- a/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_pretrain.sh +++ b/tests/0day/qwen3/qwen3-4b/data_convert_qwen3_4b_pretrain.sh @@ -4,7 +4,7 @@ mkdir ./dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00042-d964455e17e96d5a.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-4B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-4B-Base/ \ --tokenizer-type PretrainedFromHF \ --handler-name GeneralPretrainHandler \ --output-prefix ./dataset/enwiki \ diff --git a/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_instruction.sh b/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_instruction.sh index e3a528b30..a06e71ee6 100644 --- a/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_instruction.sh +++ b/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_instruction.sh @@ -4,7 +4,7 @@ mkdir ./finetune_dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-8B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-8B-Base/ \ --output-prefix ./finetune_dataset/alpaca \ --handler-name AlpacaStyleInstructionHandler \ --tokenizer-type PretrainedFromHF \ diff --git a/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_pretrain.sh b/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_pretrain.sh index bda76b554..e4f2d200f 100644 --- a/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_pretrain.sh +++ b/tests/0day/qwen3/qwen3-8b/data_convert_qwen3_8b_pretrain.sh @@ -4,7 +4,7 @@ mkdir ./dataset python ./preprocess_data.py \ --input ./dataset/train-00000-of-00042-d964455e17e96d5a.parquet \ - --tokenizer-name-or-path ./mdoel_from_hf/Qwen3-8B-Base/ \ + --tokenizer-name-or-path ./model_from_hf/Qwen3-8B-Base/ \ --tokenizer-type PretrainedFromHF \ --handler-name GeneralPretrainHandler \ --output-prefix ./dataset/enwiki \ -- Gitee