From 1cc4e5eabce6aefa3649331b93fa3dfb38d850cf Mon Sep 17 00:00:00 2001 From: z00919396 Date: Thu, 20 Mar 2025 20:56:45 +0800 Subject: [PATCH] add cpu bind patch --- cpu-bind-optimization-v066-post1.patch | 96 ++++++++++++++++++++++++++ vllm.spec | 11 ++- 2 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 cpu-bind-optimization-v066-post1.patch diff --git a/cpu-bind-optimization-v066-post1.patch b/cpu-bind-optimization-v066-post1.patch new file mode 100644 index 0000000..7f78be9 --- /dev/null +++ b/cpu-bind-optimization-v066-post1.patch @@ -0,0 +1,96 @@ +diff --git a/vllm-0.6.6.post1/vllm/worker/worker.py b/vllm-0.6.6.post1/vllm/worker/worker.py +index f51b51d..fc5943a 100644 +--- a/vllm-0.6.6.post1/vllm/worker/worker.py ++++ b/vllm-0.6.6.post1/vllm/worker/worker.py +@@ -31,6 +31,83 @@ from vllm.worker.worker_base import (LocalOrDistributedWorkerBase, WorkerBase, + + logger = init_logger(__name__) + ++import subprocess ++import psutil ++ ++def execute_command(cmd_list): ++ try: ++ with subprocess.Popen(cmd_list, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p: ++ out, _ = p.communicate(timeout=1000) ++ res = out.decode() ++ return res ++ except FileNotFoundError as e: ++ raise RuntimeError(f"Failed to execute command, because {e}.") ++ ++ ++def get_numa_map(): ++ numa_topo_out = execute_command(["npu-smi", "info", "-t", "topo"]).strip().split("\n") ++ ++ line_no = 0 ++ npu_no = 0 ++ numa_to_npu_map = {} ++ numa_number = 0 ++ max_cpu = 0 ++ ++ numa_node = execute_command("lscpu").strip().split("\n") ++ for val in numa_node: ++ if val.startswith("CPU(s):"): ++ max_cpu = int(val.split(" ")[-1]) - 1 ++ if val.startswith("NUMA"): ++ nodes = val.split(" ") ++ numa_number = int(nodes[-1]) ++ break ++ ++ npu_max_cpu = False ++ npu_max_cpu_no = 0 ++ for val in numa_topo_out: ++ line_no += 1 ++ line = ''.join(val.split()) ++ if line.startswith("NPU") and line_no > 1: ++ cpu_range = line[33:] ++ npu_max_cpu_no = max(npu_max_cpu_no, int(cpu_range.split("-")[1])) ++ if numa_to_npu_map.get(cpu_range, None) is None: ++ numa_to_npu_map[cpu_range] = list() ++ numa_to_npu_map[cpu_range].append(npu_no) ++ npu_no += 1 ++ ++ npu_max_cpu = True if npu_max_cpu_no==max_cpu else False ++ shared_mode = False ++ if npu_no > numa_number: ++ shared_mode = True ++ ++ npu_to_core_map = {} ++ for key, val in numa_to_npu_map.items(): ++ cpu_range = key.split("-") ++ total_core_num = int(cpu_range[1]) - int(cpu_range[0]) + 1 ++ cpu_start = int(cpu_range[0]) + total_core_num if npu_max_cpu==False else int(cpu_range[0]) - total_core_num ++ cpu_end = int(cpu_range[1]) + total_core_num if npu_max_cpu==False else int(cpu_range[1]) - total_core_num ++ shared_mode = True ++ if shared_mode: ++ shared_npu_num = len(val) ++ core_num_per_npu = int(total_core_num / shared_npu_num) ++ else: ++ core_num_per_npu = total_core_num if npu_max_cpu==False else -(total_core_num) ++ core_start = cpu_start ++ for npu in val: ++ npu_to_core_map[npu] = [core_start, core_start + core_num_per_npu] ++ core_start += core_num_per_npu ++ ++ return npu_to_core_map ++ ++def bind_cpu(rank): ++ rank_cpu_maps = get_numa_map() ++ ++ cpu_range = rank_cpu_maps[rank] ++ cpu_list = list(range(cpu_range[0], cpu_range[1])) ++ current_process = psutil.Process() ++ current_process.cpu_affinity(cpu_list) ++ ++ print(f"bind process {current_process.pid} in rank{rank} to cpu: {cpu_list}", flush=True) + + class Worker(LocalOrDistributedWorkerBase): + """A worker class that executes (a partition of) the model on a GPU. +@@ -53,6 +130,7 @@ class Worker(LocalOrDistributedWorkerBase): + self.parallel_config.rank = rank + self.local_rank = local_rank + self.rank = rank ++ bind_cpu(local_rank) + self.distributed_init_method = distributed_init_method + self.is_driver_worker = is_driver_worker + if is_driver_worker: diff --git a/vllm.spec b/vllm.spec index be80680..0707c56 100644 --- a/vllm.spec +++ b/vllm.spec @@ -3,12 +3,14 @@ Name: vllm Version: 0.6.6.post1 -Release: 1 +Release: 2 Summary: Powerful engine for LLMs License: (Apache-2.0 AND BSD-3-Clause) OR BSD-3-CLause URL: https://github.com/vllm-project/vllm Source0: https://gitee.com/src-openeuler/vllm/raw/master/vllm-%{version}.tar.gz +Patch0: cpu-bind-optimization-v066-post1.patch + BuildArch: noarch %description @@ -28,6 +30,7 @@ Buildrequires: python3-pytorch %prep %autosetup -n %{name}-%{version} -N +%patch -P0 -p2 %build export VLLM_TARGET_DEVICE=empty @@ -60,5 +63,11 @@ mv %{buildroot}/filelist.lst . %files -n python3-%{_name} -f filelist.lst %changelog +* Thu Mar 20 2025 zhurui - 0.6.6.post1-2 +- Type:enhancement +- ID:NA +- SUG:NA +- DESC:optimize cpu bind + * Fri Feb 28 2025 renwenjie - 0.6.6.post1-1 - Package init -- Gitee