From 8eee3148761ecbf85e4fdec6837ca3ed1453d798 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=AE=A6=E6=99=93=E7=8E=B2?= <3174348550@qq.com>
Date: Tue, 30 Sep 2025 15:32:14 +0800
Subject: [PATCH] modify contents

---
 .../developer_guide/operations/custom_ops.md         |  2 +-
 docs/vllm_mindspore/docs/source_en/faqs/faqs.md      |  6 +++---
 .../docs/source_en/general/security.md               | 12 ++++++------
 .../getting_started/installation/installation.md     |  2 +-
 .../getting_started/quick_start/quick_start.md       |  8 +++-----
 .../deepseek_r1_671b_w8a8_dp4_tp4_ep4.md             |  8 +++-----
 .../getting_started/quick_start/quick_start.md       |  2 +-
 .../qwen2.5_7b_singleNPU/qwen2.5_7b_singleNPU.md     |  2 +-
 .../ms_infer/ms_infer_model_serving_infer.md         |  4 ++--
 .../ms_infer/ms_infer_model_serving_infer.md         |  4 ++--
 10 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/docs/vllm_mindspore/docs/source_en/developer_guide/operations/custom_ops.md b/docs/vllm_mindspore/docs/source_en/developer_guide/operations/custom_ops.md
index e16285a9fe..b0efe690ea 100644
--- a/docs/vllm_mindspore/docs/source_en/developer_guide/operations/custom_ops.md
+++ b/docs/vllm_mindspore/docs/source_en/developer_guide/operations/custom_ops.md
@@ -35,7 +35,7 @@ vllm-mindspore/
 
 ## Integration Process
 
-To integrate a custom operator, user need to create [Operator Interface Declaration](#operator-interface-declaration), [Operator Implementation](#operator-implementation) and [Operator Integration](#operator-integration) in the directory `ops/ascendc/`. After the initial development and integration of the custom operator, user can add [Operator Interface](#add-operator-call-interface) and do [Operator Compilation and Testing](#operator-compilation-and-testing) after declaration and implementation.
+To integrate a custom operator, users need to create [Operator Interface Declaration](#operator-interface-declaration), [Operator Implementation](#operator-implementation) and [Operator Integration](#operator-integration) in the directory `ops/ascendc/`. After the initial development and integration of the custom operator, user can add [Operator Interface](#add-operator-call-interface) and do [Operator Compilation and Testing](#operator-compilation-and-testing) after declaration and implementation.
 
 ### Operator Interface Declaration
 
diff --git a/docs/vllm_mindspore/docs/source_en/faqs/faqs.md b/docs/vllm_mindspore/docs/source_en/faqs/faqs.md
index c781201408..8e993fbaa3 100644
--- a/docs/vllm_mindspore/docs/source_en/faqs/faqs.md
+++ b/docs/vllm_mindspore/docs/source_en/faqs/faqs.md
@@ -37,14 +37,14 @@
 
 - Solution:
   1. Check if the model path exists and is valid;
-  2. If the model path exists and the model files are in `safetensors` format, confirm whether the yaml file contains the `load_ckpt_format: "safetensors"` field:
-     1. Print the path of the yaml file used by the model:
+  2. If the model path exists and the model files are in `safetensors` format, confirm whether the YAML file contains the `load_ckpt_format: "safetensors"` field:
+     1. Print the path of the YAML file used by the model:
 
         ```bash
         echo $MINDFORMERS_MODEL_CONFIG
         ```
 
-     2. Check the yaml file. If the `load_ckpt_format` field is missing, add it:
+     2. Check the YAML file. If the `load_ckpt_format` field is missing, add it:
 
         ```text
         load_ckpt_format: "safetensors"
diff --git a/docs/vllm_mindspore/docs/source_en/general/security.md b/docs/vllm_mindspore/docs/source_en/general/security.md
index d156fffcb7..b8744e4909 100644
--- a/docs/vllm_mindspore/docs/source_en/general/security.md
+++ b/docs/vllm_mindspore/docs/source_en/general/security.md
@@ -2,7 +2,7 @@
 
 [![View Source On Gitee](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/website-images/master/resource/_static/logo_source_en.svg)](https://gitee.com/mindspore/docs/blob/master/docs/vllm_mindspore/docs/source_en/general/security.md)
 
-When enabling inference services using vLLM-MindSpore Plugin on Ascend, there may be some security-related issues due to the need for certain network ports for necessary functions such as serviceification, node communication, and model execution.
+When enabling inference services using vLLM-MindSpore Plugin on Ascend, there may be some security-related issues due to the need for certain network ports for necessary functions such as service-oriented, node communication, and model execution.
 
 ## Service Port Configuration
 
@@ -28,15 +28,15 @@ For security, it should be deployed in a sufficiently secure isolated network en
 
 1. Environment Variables:
     * `VLLM_HOST_IP`: Sets the IP address for vLLM processes to communicate on, main scenario is to communicate in MindSpore distributed network.
-    * `VLLM_DP_MASTER_IP`: Sets the IP address for data parallel(not for online-serving, default: `127.0.0.1`).
-    * `VLLM_DP_MASTER_PORT`: Sets the port for data parallel(not for online-serving, default: `0`).
+    * `VLLM_DP_MASTER_IP`: Sets the IP address for data parallel (not for online-serving, default: `127.0.0.1`).
+    * `VLLM_DP_MASTER_PORT`: Sets the port for data parallel (not for online-serving, default: `0`).
 2. Data Parallel Configuration:
-    * `data_parallel_master_ip`: Sets the IP address for data parallel(default: `127.0.0.1`).
-    * `data_parallel_master_port`: Sets the port for data parallel(default: `29500`).
+    * `data_parallel_master_ip`: Sets the IP address for data parallel (default: `127.0.0.1`).
+    * `data_parallel_master_port`: Sets the port for data parallel (default: `29500`).
 
 ### Executing Framework Distributed Communication
 
-It should be noted that vLLM-MindSpore Plugin use MindSpore's distributed communication. For detailed security information about MindSpore, please refer to the [MindSpore](https://www.mindspore.cn/en).
+It should be noted that vLLM-MindSpore Plugin uses MindSpore's distributed communication. For detailed security information about MindSpore, please refer to the [MindSpore](https://www.mindspore.cn/en).
 
 ## Security Recommendations
 
diff --git a/docs/vllm_mindspore/docs/source_en/getting_started/installation/installation.md b/docs/vllm_mindspore/docs/source_en/getting_started/installation/installation.md
index a3b73899ab..c3e3759de0 100644
--- a/docs/vllm_mindspore/docs/source_en/getting_started/installation/installation.md
+++ b/docs/vllm_mindspore/docs/source_en/getting_started/installation/installation.md
@@ -149,7 +149,7 @@ vLLM-MindSpore Plugin can be installed in the following two ways. **vLLM-MindSpo
 
 - **vLLM-MindSpore Plugin Manual Installation**
 
-    If user need to modify the components or use other versions, components need to be manually installed in a specific order. Version compatibility of vLLM-MindSpore Plugin can be found [Version Compatibility](#version-compatibility), abd vLLM-MindSpore Plugin requires the following installation sequence:  
+    If users require custom modifications to dependent components such as vLLM, MindSpore, Golden Stick, or MSAdapter, they can prepare the modified installation packages locally and perform manual installation in a specific sequence. The installation sequence requirements are as follows:
 
     1. Install vLLM  
 
diff --git a/docs/vllm_mindspore/docs/source_en/getting_started/quick_start/quick_start.md b/docs/vllm_mindspore/docs/source_en/getting_started/quick_start/quick_start.md
index e425c661a4..ef7199fd83 100644
--- a/docs/vllm_mindspore/docs/source_en/getting_started/quick_start/quick_start.md
+++ b/docs/vllm_mindspore/docs/source_en/getting_started/quick_start/quick_start.md
@@ -140,13 +140,11 @@ Here is an explanation of these environment variables:
 - `vLLM_MODEL_BACKEND`: The backend of the model to run. User could find supported models and backends for vLLM-MindSpore Plugin in the [Model Support List](../../user_guide/supported_models/models_list/models_list.md).  
 - `MINDFORMERS_MODEL_CONFIG`: The model configuration file. User can find the corresponding YAML file in the [MindSpore Transformers repository](https://gitee.com/mindspore/mindformers/tree/master/research/qwen2_5). For Qwen2.5-7B, the YAML file is [predict_qwen2_5_7b_instruct.yaml](https://gitee.com/mindspore/mindformers/blob/master/research/qwen2_5/predict_qwen2_5_7b_instruct.yaml).
 
-Additionally, users need to ensure that MindSpore Transformers is installed. Users can add it by running the following command:  
+Additionally, users need to ensure that MindSpore Transformers is installed. Users can introduce MindSpore Transformers through the following methods:
 
 ```bash  
 export PYTHONPATH=/path/to/mindformers:$PYTHONPATH  
-```  
-
-This will include MindSpore Transformers in the Python path.
+```
 
 ### Offline Inference
 
@@ -209,7 +207,7 @@ INFO:   Application startup complete.
 Additionally, performance metrics will be logged, such as:  
 
 ```text  
-Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg gereration throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 0.0%
+Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 0.0%
 ```  
 
 #### Sending Requests
diff --git a/docs/vllm_mindspore/docs/source_en/getting_started/tutorials/deepseek_parallel/deepseek_r1_671b_w8a8_dp4_tp4_ep4.md b/docs/vllm_mindspore/docs/source_en/getting_started/tutorials/deepseek_parallel/deepseek_r1_671b_w8a8_dp4_tp4_ep4.md
index 81aee810d3..ddb581a80e 100644
--- a/docs/vllm_mindspore/docs/source_en/getting_started/tutorials/deepseek_parallel/deepseek_r1_671b_w8a8_dp4_tp4_ep4.md
+++ b/docs/vllm_mindspore/docs/source_en/getting_started/tutorials/deepseek_parallel/deepseek_r1_671b_w8a8_dp4_tp4_ep4.md
@@ -115,13 +115,11 @@ Environment variable descriptions:
 - `ASCEND_RT_VISIBLE_DEVICES`: Configure the available device IDs for each node. Use the `npu-smi info` command to check.  
 - `VLLM_MS_MODEL_BACKEND`: The backend of the model to run. Currently supported models and backends for vLLM-MindSpore Plugin can be found in the [Model Support List](../../../user_guide/supported_models/models_list/models_list.md).  
 
-Additionally, users need to ensure that MindSpore Transformers is installed. Users can add it by running the following command:  
+Additionally, users need to ensure that MindSpore Transformers is installed. Users can introduce MindSpore Transformers through the following methods:
 
 ```bash  
 export PYTHONPATH=/path/to/mindformers:$PYTHONPATH  
-```  
-
-This will include MindSpore Transformers in the Python path.
+```
 
 ### Starting Ray for Multi-Node Cluster Management
 
@@ -302,7 +300,7 @@ vllm-mindspore serve
  --additional-config '{"expert_parallel": [EP Parallelism Degree]}'
 ```
 
-`data-parallel-size` and `tensor-parallel-size` specify the parallel policies for the attn and ffn-dense parts, and `expert_parallel` specifies the parallel policies for the routing experts in the moe part. And it must satisfy that `data-parallel-size * tensor-parallel-size` is divisible by `expert_parallel`.
+`data-parallel-size` and `tensor-parallel-size` specify the parallel policies for the attn and ffn-dense parts, and `expert_parallel` specifies the parallel policies for the routing experts in the MOE part. And it must satisfy that `data-parallel-size * tensor-parallel-size` is divisible by `expert_parallel`.
 
 User can also set the local model path by `--model` argument. The following is an execution example:  
 
diff --git a/docs/vllm_mindspore/docs/source_zh_cn/getting_started/quick_start/quick_start.md b/docs/vllm_mindspore/docs/source_zh_cn/getting_started/quick_start/quick_start.md
index fa11bfc813..97c216f37d 100644
--- a/docs/vllm_mindspore/docs/source_zh_cn/getting_started/quick_start/quick_start.md
+++ b/docs/vllm_mindspore/docs/source_zh_cn/getting_started/quick_start/quick_start.md
@@ -207,7 +207,7 @@ INFO:   Application startup complete.
 另外，日志中还会打印服务的性能数据信息，如：
 
 ```text
-Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg gereration throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 0.0%
+Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 0.0%
 ```
 
 #### 发送请求
diff --git a/docs/vllm_mindspore/docs/source_zh_cn/getting_started/tutorials/qwen2.5_7b_singleNPU/qwen2.5_7b_singleNPU.md b/docs/vllm_mindspore/docs/source_zh_cn/getting_started/tutorials/qwen2.5_7b_singleNPU/qwen2.5_7b_singleNPU.md
index 973a64154c..1b06ec4b9d 100644
--- a/docs/vllm_mindspore/docs/source_zh_cn/getting_started/tutorials/qwen2.5_7b_singleNPU/qwen2.5_7b_singleNPU.md
+++ b/docs/vllm_mindspore/docs/source_zh_cn/getting_started/tutorials/qwen2.5_7b_singleNPU/qwen2.5_7b_singleNPU.md
@@ -204,7 +204,7 @@ INFO:   Application startup complete.
 另外，日志中还会打印出服务的性能数据信息，如：
 
 ```text
-Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg gereration throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 0.0%
+Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 0.0%
 ```
 
 ### 发送请求
diff --git a/tutorials/source_en/model_infer/ms_infer/ms_infer_model_serving_infer.md b/tutorials/source_en/model_infer/ms_infer/ms_infer_model_serving_infer.md
index 9ff91c03f2..4051064360 100644
--- a/tutorials/source_en/model_infer/ms_infer/ms_infer_model_serving_infer.md
+++ b/tutorials/source_en/model_infer/ms_infer/ms_infer_model_serving_infer.md
@@ -152,7 +152,7 @@ export PYTHONPATH=/path/to/mindformers:$PYTHONPATH
 vLLM-MindSpore Plugin supports online inference deployment with the OpenAI API protocol. Users can run the following command to start the vLLM-MindSpore Plugin online inference service:
 
 ```bash
-vllm-mindspore serve --model=/path/to/model/Qwen2-7B --trust_remote_code --max-num-seqs=256 --max_model_len=32768 --max-num-batched-tokens=4096 --block_size=128 --gpu-memory-utilization=0.9
+vllm-mindspore serve --model=/path/to/model/Qwen2-7B --trust_remote_code --max-num-seqs=256 --max-model-len=32768 --max-num-batched-tokens=4096 --block_size=128 --gpu-memory-utilization=0.9
 ```
 
 User can also set the local model path by `--model` argument. If the service starts successfully, similar output will be obtained:  
@@ -166,7 +166,7 @@ INFO:   Application startup complete.
 Additionally, performance metrics will be logged, such as:
 
 ```text  
-Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg gereration throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 0.0%
+Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 0.0%
 ```
 
 ### Sending a Request
diff --git a/tutorials/source_zh_cn/model_infer/ms_infer/ms_infer_model_serving_infer.md b/tutorials/source_zh_cn/model_infer/ms_infer/ms_infer_model_serving_infer.md
index 1c48689cf2..349bf7c044 100644
--- a/tutorials/source_zh_cn/model_infer/ms_infer/ms_infer_model_serving_infer.md
+++ b/tutorials/source_zh_cn/model_infer/ms_infer/ms_infer_model_serving_infer.md
@@ -153,7 +153,7 @@ export PYTHONPATH=/path/to/mindformers:$PYTHONPATH
 vLLM-MindSpore插件可使用OpenAI的API协议，进行在线推理部署。执行如下命令，启动vLLM-MindSpore插件的在线推理服务：
 
 ```bash
-vllm-mindspore serve --model=/path/to/model/Qwen2-7B --trust_remote_code --max-num-seqs=256 --max_model_len=32768 --max-num-batched-tokens=4096 --block_size=128 --gpu-memory-utilization=0.9
+vllm-mindspore serve --model=/path/to/model/Qwen2-7B --trust_remote_code --max-num-seqs=256 --max-model-len=32768 --max-num-batched-tokens=4096 --block_size=128 --gpu-memory-utilization=0.9
 ```
 
 用户可以通过`--model`参数，指定模型保存的本地路径。若服务成功启动，则可以获得类似的执行结果：
@@ -167,7 +167,7 @@ INFO:   Application startup complete.
 另外，日志中还会打印服务的性能数据信息，如：
 
 ```text
-Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg gereration throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 0.0%
+Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 0.0%
 ```
 
 ### 发送请求
-- 
Gitee