From 1cc4e5eabce6aefa3649331b93fa3dfb38d850cf Mon Sep 17 00:00:00 2001
From: z00919396 <zhurui10@huawei.com>
Date: Thu, 20 Mar 2025 20:56:45 +0800
Subject: [PATCH] add cpu bind patch

---
 cpu-bind-optimization-v066-post1.patch | 96 ++++++++++++++++++++++++++
 vllm.spec                              | 11 ++-
 2 files changed, 106 insertions(+), 1 deletion(-)
 create mode 100644 cpu-bind-optimization-v066-post1.patch

diff --git a/cpu-bind-optimization-v066-post1.patch b/cpu-bind-optimization-v066-post1.patch
new file mode 100644
index 0000000..7f78be9
--- /dev/null
+++ b/cpu-bind-optimization-v066-post1.patch
@@ -0,0 +1,96 @@
+diff --git a/vllm-0.6.6.post1/vllm/worker/worker.py b/vllm-0.6.6.post1/vllm/worker/worker.py
+index f51b51d..fc5943a 100644
+--- a/vllm-0.6.6.post1/vllm/worker/worker.py
++++ b/vllm-0.6.6.post1/vllm/worker/worker.py
+@@ -31,6 +31,83 @@ from vllm.worker.worker_base import (LocalOrDistributedWorkerBase, WorkerBase,
+ 
+ logger = init_logger(__name__)
+ 
++import subprocess
++import psutil
++
++def execute_command(cmd_list):
++    try:
++        with subprocess.Popen(cmd_list, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p:
++            out, _ = p.communicate(timeout=1000)
++        res = out.decode()
++        return res
++    except FileNotFoundError as e:
++        raise RuntimeError(f"Failed to execute command, because {e}.")
++
++
++def get_numa_map():
++    numa_topo_out = execute_command(["npu-smi", "info", "-t", "topo"]).strip().split("\n")
++
++    line_no = 0
++    npu_no = 0
++    numa_to_npu_map = {}
++    numa_number = 0
++    max_cpu = 0
++
++    numa_node = execute_command("lscpu").strip().split("\n")
++    for val in numa_node:
++        if val.startswith("CPU(s):"):
++            max_cpu = int(val.split(" ")[-1]) - 1
++        if val.startswith("NUMA"):
++            nodes = val.split(" ")
++            numa_number = int(nodes[-1])
++            break
++
++    npu_max_cpu = False
++    npu_max_cpu_no = 0
++    for val in numa_topo_out:
++        line_no += 1
++        line = ''.join(val.split())
++        if line.startswith("NPU") and line_no > 1:
++            cpu_range = line[33:]
++            npu_max_cpu_no = max(npu_max_cpu_no, int(cpu_range.split("-")[1]))
++            if numa_to_npu_map.get(cpu_range, None) is None:
++                numa_to_npu_map[cpu_range] = list()
++            numa_to_npu_map[cpu_range].append(npu_no)
++            npu_no += 1
++
++    npu_max_cpu = True if npu_max_cpu_no==max_cpu else False
++    shared_mode = False
++    if npu_no > numa_number:
++        shared_mode = True
++
++    npu_to_core_map = {}
++    for key, val in numa_to_npu_map.items():
++        cpu_range = key.split("-")
++        total_core_num = int(cpu_range[1]) - int(cpu_range[0]) + 1
++        cpu_start = int(cpu_range[0]) + total_core_num if npu_max_cpu==False else int(cpu_range[0]) - total_core_num
++        cpu_end = int(cpu_range[1]) + total_core_num if npu_max_cpu==False else int(cpu_range[1]) - total_core_num
++        shared_mode = True
++        if shared_mode:
++            shared_npu_num = len(val)
++            core_num_per_npu = int(total_core_num / shared_npu_num)
++        else:
++            core_num_per_npu = total_core_num if npu_max_cpu==False else -(total_core_num)
++        core_start = cpu_start
++        for npu in val:
++            npu_to_core_map[npu] = [core_start, core_start + core_num_per_npu]
++            core_start += core_num_per_npu
++
++    return npu_to_core_map
++
++def bind_cpu(rank):
++    rank_cpu_maps = get_numa_map()
++
++    cpu_range = rank_cpu_maps[rank]
++    cpu_list = list(range(cpu_range[0], cpu_range[1]))
++    current_process = psutil.Process()
++    current_process.cpu_affinity(cpu_list)
++
++    print(f"bind process {current_process.pid} in rank{rank} to cpu: {cpu_list}", flush=True)
+ 
+ class Worker(LocalOrDistributedWorkerBase):
+     """A worker class that executes (a partition of) the model on a GPU.
+@@ -53,6 +130,7 @@ class Worker(LocalOrDistributedWorkerBase):
+         self.parallel_config.rank = rank
+         self.local_rank = local_rank
+         self.rank = rank
++        bind_cpu(local_rank)
+         self.distributed_init_method = distributed_init_method
+         self.is_driver_worker = is_driver_worker
+         if is_driver_worker:
diff --git a/vllm.spec b/vllm.spec
index be80680..0707c56 100644
--- a/vllm.spec
+++ b/vllm.spec
@@ -3,12 +3,14 @@
 
 Name:       vllm
 Version:    0.6.6.post1
-Release:    1
+Release:    2
 Summary:    Powerful engine for LLMs
 License:    (Apache-2.0 AND BSD-3-Clause) OR BSD-3-CLause
 URL:        https://github.com/vllm-project/vllm
 Source0:    https://gitee.com/src-openeuler/vllm/raw/master/vllm-%{version}.tar.gz
 
+Patch0:  cpu-bind-optimization-v066-post1.patch
+
 BuildArch:  noarch
 
 %description
@@ -28,6 +30,7 @@ Buildrequires:  python3-pytorch
 
 %prep
 %autosetup -n %{name}-%{version} -N
+%patch -P0 -p2
 
 %build
 export VLLM_TARGET_DEVICE=empty
@@ -60,5 +63,11 @@ mv %{buildroot}/filelist.lst .
 %files -n python3-%{_name} -f filelist.lst
 
 %changelog
+* Thu Mar 20 2025 zhurui<zhurui10@huawei.com> - 0.6.6.post1-2
+- Type:enhancement
+- ID:NA
+- SUG:NA
+- DESC:optimize cpu bind
+
 * Fri Feb 28 2025 renwenjie<renwenjie5@huawei.com> - 0.6.6.post1-1
 - Package init
-- 
Gitee