From e0f58877ab5789a005ad10186a7f4a8e946da1e5 Mon Sep 17 00:00:00 2001
From: Shao Zhiyu <shaozhiyu@wxiat.com>
Date: Thu, 9 Oct 2025 07:15:56 +0000
Subject: [PATCH 1/5] sw64: pci: remove redundant debug control
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sunway inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IDDHEJ

--------------------------------

The 'CONFIG_PCI_DEBUG' in sunway pci controller driver is not functional
as kernel recognizes pci controllers as devices, so remove the old debug
check and use 'dev_dbg()' instead.

This commit also enable 'CONFIG_DYNAMIC_DEBUG' to control debug in sunway
pci controller and developers can now use parameter dyndbg="file [FILE] +p"
in kernel command line to do runtime debug.

Signed-off-by: Shao Zhiyu <shaozhiyu@wxiat.com>
Reviewed-by: He Sheng <hesheng@wxiat.com>
Signed-off-by: Gu Zitao <guzitao@wxiat.com>
---
 drivers/pci/controller/pci-sunway.c | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/pci/controller/pci-sunway.c b/drivers/pci/controller/pci-sunway.c
index d8e291decf65..e3487e2ea43c 100644
--- a/drivers/pci/controller/pci-sunway.c
+++ b/drivers/pci/controller/pci-sunway.c
@@ -562,11 +562,10 @@ static int pci_read_rc_cfg(struct pci_bus *bus, unsigned int devfn,
 	struct pci_controller *hose = pci_bus_to_pci_controller(bus);
 	void __iomem *cfg_iobase = hose->rc_config_space_base;
 
-	if (IS_ENABLED(CONFIG_PCI_DEBUG))
-		pr_debug("rc read addr:%px bus %d, devfn %#x, where %#x size=%d\t",
-				cfg_iobase + ((where & ~3) << 5),
-				bus->number,
-				devfn, where, size);
+	dev_dbg(&bus->dev, "rc read addr:%px bus %d, devfn %#x, where %#x size=%d\t",
+			cfg_iobase + ((where & ~3) << 5),
+			bus->number,
+			devfn, where, size);
 
 	if ((uintptr_t)where & (size - 1)) {
 		*val = 0;
@@ -596,8 +595,7 @@ static int pci_read_rc_cfg(struct pci_bus *bus, unsigned int devfn,
 		break;
 	}
 
-	if (IS_ENABLED(CONFIG_PCI_DEBUG))
-		pr_debug("*val %#x\n ", *val);
+	dev_dbg(&bus->dev, "*val %#x\n ", *val);
 
 	return PCIBIOS_SUCCESSFUL;
 }
@@ -632,11 +630,10 @@ static int pci_write_rc_cfg(struct pci_bus *bus, unsigned int devfn,
 		break;
 	}
 
-	if (IS_ENABLED(CONFIG_PCI_DEBUG))
-		pr_debug("rc write addr:%px bus %d, devfn %#x, where %#x *val %#x size %d\n",
-				cfg_iobase + ((where & ~3) << 5),
-				bus->number,
-				devfn, where, val, size);
+	dev_dbg(&bus->dev, "rc write addr:%px bus %d, devfn %#x, where %#x *val %#x size %d\n",
+			cfg_iobase + ((where & ~3) << 5),
+			bus->number,
+			devfn, where, val, size);
 
 	writel(data, cfg_iobase + ((where & ~3) << 5));
 
@@ -762,9 +759,8 @@ void __iomem *sunway_pci_map_bus(struct pci_bus *bus,
 
 	cfg_iobase = hose->ep_config_space_base + relbus;
 
-	if (IS_ENABLED(CONFIG_PCI_DEBUG))
-		pr_debug("addr:%px bus %d, devfn %d, where %d\n",
-				cfg_iobase, bus->number, devfn, where);
+	dev_dbg(&bus->dev, "addr:%px bus %d, devfn %d, where %d\n",
+			cfg_iobase, bus->number, devfn, where);
 	return cfg_iobase;
 }
 EXPORT_SYMBOL(sunway_pci_map_bus);
-- 
Gitee


From 8af49cb59e51e5e9865cfc678671a08d0255a019 Mon Sep 17 00:00:00 2001
From: Chen Wang <chenwang@wxiat.com>
Date: Thu, 30 Oct 2025 17:09:24 +0800
Subject: [PATCH 2/5] sw64: kvm: add pv steal time support

Sunway inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IDDHEJ

--------------------------------

Introduce a per-cpu structure pvclock_steal_time to store the steal
time for each vcpu. When VM exits due to KVM_EXIT_SET_PVTIME_ST exception,
it passes address to struct kvm_steal_time and informs the hypervisor
to enable steal time. Additionally, KVM_REQ_RECORD_STEAL is added to
handle updates of the steal time of vcpu.

Signed-off-by: Chen Wang <chenwang@wxiat.com>
Reviewed-by: He Sheng <hesheng@wxiat.com>
Signed-off-by: Gu Zitao <guzitao@wxiat.com>
---
 arch/sw_64/include/asm/kvm_asm.h  |  3 +-
 arch/sw_64/include/asm/kvm_host.h | 21 +++++++++
 arch/sw_64/include/asm/pvtime.h   | 16 +++++++
 arch/sw_64/kvm/Makefile           |  3 +-
 arch/sw_64/kvm/handle_exit.c      |  4 ++
 arch/sw_64/kvm/pvtime.c           | 78 +++++++++++++++++++++++++++++++
 arch/sw_64/kvm/sw64.c             |  8 ++++
 7 files changed, 131 insertions(+), 2 deletions(-)
 create mode 100644 arch/sw_64/include/asm/pvtime.h
 create mode 100644 arch/sw_64/kvm/pvtime.c

diff --git a/arch/sw_64/include/asm/kvm_asm.h b/arch/sw_64/include/asm/kvm_asm.h
index a8e8ef3d68a0..f714f5ea12ae 100644
--- a/arch/sw_64/include/asm/kvm_asm.h
+++ b/arch/sw_64/include/asm/kvm_asm.h
@@ -14,9 +14,9 @@
 #define SW64_KVM_EXIT_RESTART		17
 #define SW64_KVM_EXIT_APT_FAULT		18
 #define SW64_KVM_EXIT_FATAL_ERROR	22
+#define SW64_KVM_EXIT_SET_PVTIME_ST	23
 #define SW64_KVM_EXIT_DEBUG		24
 
-
 #define kvm_sw64_exception_type	\
 	{0, "HOST_INTR" },	\
 	{1, "IO" },		\
@@ -28,6 +28,7 @@
 	{17, "RESTART" },	\
 	{18, "APT_FAULT" },	\
 	{22, "FATAL_ERROR" },	\
+	{23, "SET_PVTIME_ST" }, \
 	{24, "DEBUG" }
 
 
diff --git a/arch/sw_64/include/asm/kvm_host.h b/arch/sw_64/include/asm/kvm_host.h
index 7dd763a7d4c2..33df07b9a665 100644
--- a/arch/sw_64/include/asm/kvm_host.h
+++ b/arch/sw_64/include/asm/kvm_host.h
@@ -73,6 +73,8 @@
 #define KVM_PHYS_SIZE	(_AC(1, ULL) << KVM_PHYS_SHIFT)
 #define KVM_PHYS_MASK	(KVM_PHYS_SIZE - _AC(1, ULL))
 
+#define KVM_REQ_RECORD_STEAL    KVM_ARCH_REQ(0)
+
 struct kvm_arch_memory_slot {
 };
 
@@ -121,6 +123,12 @@ struct kvm_vcpu_arch {
 
 	/* Cache some mmu pages needed inside spinlock regions */
 	struct kvm_mmu_memory_cache mmu_page_cache;
+
+	/* Guest steal-time state */
+	struct {
+		gpa_t base;
+		u64 last_steal;
+	} steal;
 };
 
 struct vmem_info {
@@ -216,4 +224,17 @@ static inline bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu)
 	return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu;
 }
 
+void kvm_init_steal_time(struct kvm_vcpu *vcpu);
+void kvm_sw64_record_steal_time(struct kvm_vcpu *vcpu);
+
+static inline void kvm_sw64_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
+{
+	vcpu_arch->steal.base = INVALID_GPA;
+	vcpu_arch->steal.last_steal = 0;
+}
+
+static inline bool kvm_sw64_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
+{
+	return (vcpu_arch->steal.base != INVALID_GPA);
+}
 #endif /* _ASM_SW64_KVM_HOST_H */
diff --git a/arch/sw_64/include/asm/pvtime.h b/arch/sw_64/include/asm/pvtime.h
new file mode 100644
index 000000000000..14a52a0e493b
--- /dev/null
+++ b/arch/sw_64/include/asm/pvtime.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2025 SW64 Ltd. */
+
+#ifndef __ASM_PVTIME_ABI_H
+#define __ASM_PVTIME_ABI_H
+
+/* The below structure is defined in SW64 */
+
+struct pvclock_vcpu_steal_time {
+	__u64 steal_time;
+	__u32 version;
+	/* Structure must be 128 byte aligned, pad to that size */
+	__u32 padding[29];
+} __packed;
+
+#endif
diff --git a/arch/sw_64/kvm/Makefile b/arch/sw_64/kvm/Makefile
index 530170e9167d..22f3194ad477 100644
--- a/arch/sw_64/kvm/Makefile
+++ b/arch/sw_64/kvm/Makefile
@@ -9,7 +9,8 @@ include $(srctree)/virt/kvm/Makefile.kvm
 
 obj-$(CONFIG_KVM) += kvm.o
 
-kvm-y += sw64.o emulate.o mmio.o kvm_timer.o handle_exit.o perf.o
+kvm-y += sw64.o emulate.o mmio.o kvm_timer.o handle_exit.o perf.o \
+	pvtime.o
 
 kvm-$(CONFIG_SUBARCH_C3B) += kvm_core3.o entry_core3.o
 kvm-$(CONFIG_SUBARCH_C4) += kvm_core4.o mmu.o entry_core4.o
diff --git a/arch/sw_64/kvm/handle_exit.c b/arch/sw_64/kvm/handle_exit.c
index 8c6c7325ccfc..3a623c59895a 100644
--- a/arch/sw_64/kvm/handle_exit.c
+++ b/arch/sw_64/kvm/handle_exit.c
@@ -73,6 +73,10 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
 		vcpu->run->hw.hardware_exit_reason = hargs->arg0;
 		return 0;
+	case SW64_KVM_EXIT_SET_PVTIME_ST:
+		vcpu->arch.steal.base = hargs->arg0;
+		kvm_init_steal_time(vcpu);
+		return 1;
 	}
 
 	return 1;
diff --git a/arch/sw_64/kvm/pvtime.c b/arch/sw_64/kvm/pvtime.c
new file mode 100644
index 000000000000..767617d4c5e0
--- /dev/null
+++ b/arch/sw_64/kvm/pvtime.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2025 SW64 Ltd.
+
+#include <linux/kvm_host.h>
+#include <linux/sched/stat.h>
+#include <asm/pvtime.h>
+
+void kvm_sw64_record_steal_time(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	u64 base = vcpu->arch.steal.base;
+	u64 last_steal = vcpu->arch.steal.last_steal;
+	u64 __user *steal_ptr;
+	u32 __user *version_ptr;
+	u64 offset_s = offsetof(struct pvclock_vcpu_steal_time, steal_time);
+	u64 offset_v = offsetof(struct pvclock_vcpu_steal_time, version);
+	u64 steal, stealInc, hva;
+	u32 version;
+	gfn_t gfn;
+	int idx;
+
+	if (base == INVALID_GPA)
+		return;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	gfn = base >> PAGE_SHIFT;
+	hva = kvm_vcpu_gfn_to_hva(vcpu, gfn);
+	if (WARN_ON(kvm_is_error_hva(hva))) {
+		vcpu->arch.steal.base = INVALID_GPA;
+		return;
+	}
+
+	steal_ptr = (__u64 __user *)(hva + offset_in_page(base) + offset_s);
+	version_ptr = (__u32 __user *)(hva + offset_in_page(base) + offset_v);
+
+	if (WARN_ON(get_user(version, version_ptr)))
+		return;
+
+	version += 1;
+
+	if (WARN_ON(put_user(version, version_ptr)))
+		return;
+
+	if (!WARN_ON(get_user(steal, steal_ptr))) {
+		vcpu->arch.steal.last_steal = READ_ONCE(current->sched_info.run_delay);
+		stealInc += vcpu->arch.steal.last_steal - last_steal;
+		if (stealInc) {
+			steal += stealInc;
+			WARN_ON(put_user(steal, steal_ptr));
+		}
+	}
+
+	version += 1;
+	WARN_ON(put_user(version, version_ptr));
+
+	kvm_vcpu_mark_page_dirty(vcpu, gfn);
+	srcu_read_unlock(&kvm->srcu, idx);
+}
+
+void kvm_init_steal_time(struct kvm_vcpu *vcpu)
+{
+	struct pvclock_vcpu_steal_time init_values = {};
+	struct kvm *kvm = vcpu->kvm;
+	u64 base = vcpu->arch.steal.base;
+	int idx;
+
+	if (base == INVALID_GPA)
+		return;
+
+	/*
+	 * Start counting stolen time from the time the guest
+	 * requests to set pvtime.
+	 */
+	vcpu->arch.steal.last_steal = current->sched_info.run_delay;
+	idx = srcu_read_lock(&kvm->srcu);
+	kvm_write_guest(kvm, base, &init_values, sizeof(init_values));
+	srcu_read_unlock(&kvm->srcu, idx);
+}
diff --git a/arch/sw_64/kvm/sw64.c b/arch/sw_64/kvm/sw64.c
index 73c547fb674c..0af03153cb22 100644
--- a/arch/sw_64/kvm/sw64.c
+++ b/arch/sw_64/kvm/sw64.c
@@ -122,6 +122,9 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
 			tbivpn(0, 0, vpn);
 		}
 
+		if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
+			kvm_sw64_record_steal_time(vcpu);
+
 		if (kvm_dirty_ring_check_request(vcpu))
 			return 0;
 	}
@@ -234,6 +237,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 	vcpu->arch.tsk = current;
 	vcpu->arch.pcpu_id = -1; /* force flush tlb for the first time */
 
+	kvm_sw64_pvtime_vcpu_init(&vcpu->arch);
+
 	return 0;
 }
 
@@ -281,6 +286,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = cpu;
+
+	if (kvm_sw64_is_pvtime_enabled(&vcpu->arch))
+		kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
-- 
Gitee


From fde74986ec2ae11e3fefb73a83b56fac53468b74 Mon Sep 17 00:00:00 2001
From: Chen Wang <chenwang@wxiat.com>
Date: Wed, 15 Oct 2025 08:56:02 +0800
Subject: [PATCH 3/5] sw64: paravirt: add pv steal_time support in guest side

Sunway inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IDDHEJ

--------------------------------

Add a new hcall HCALL_SET_PVTIME_ST to pass the GPA of guest steal
time structure to hypervisor. Meanwhile, para_steal_clock() function
is used to read the amount of time stolen from the current vcpu.

Signed-off-by: Chen Wang <chenwang@wxiat.com>
Reviewed-by: He Sheng <hesheng@wxiat.com>
Signed-off-by: Gu Zitao <guzitao@wxiat.com>
---
 arch/sw_64/Kconfig                | 11 +++++
 arch/sw_64/include/asm/hcall.h    |  1 +
 arch/sw_64/include/asm/paravirt.h | 16 ++++---
 arch/sw_64/kernel/paravirt.c      | 76 +++++++++++++++++++++++++++++++
 arch/sw_64/kernel/time.c          |  3 ++
 5 files changed, 101 insertions(+), 6 deletions(-)

diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig
index 75b3cf116dc8..583da0bc4a20 100644
--- a/arch/sw_64/Kconfig
+++ b/arch/sw_64/Kconfig
@@ -475,6 +475,17 @@ config PARAVIRT
 	  under a hypervisor, potentially improving performance significantly
 	  over full virtualization.
 
+config PARAVIRT_TIME_ACCOUNTING
+        bool "Paravirtual steal time accounting"
+        depends on PARAVIRT
+        help
+          Select this option to enable fine granularity task steal time
+	  accounting. Time spent executing other tasks in parallel with
+	  the current vCPU is discounted from the vCPU power. To account
+	  for that, there can be a small performance impact.
+
+	  If in doubt, say N here.
+
 config USE_PERCPU_NUMA_NODE_ID
 	def_bool y
 	depends on NUMA
diff --git a/arch/sw_64/include/asm/hcall.h b/arch/sw_64/include/asm/hcall.h
index 40eab985fcbd..04a853dc3938 100644
--- a/arch/sw_64/include/asm/hcall.h
+++ b/arch/sw_64/include/asm/hcall.h
@@ -18,6 +18,7 @@ enum HCALL_TYPE {
 	HCALL_SWNET		= 20,   /* guest request swnet service */
 	HCALL_SWNET_IRQ		= 21,   /* guest request swnet intr */
 	HCALL_FATAL_ERROR	= 22,   /* guest fatal error, issued by hmcode */
+	HCALL_SET_PVTIME_ST	= 23,   /* guest pvtime set gpa */
 	NR_HCALL
 };
 
diff --git a/arch/sw_64/include/asm/paravirt.h b/arch/sw_64/include/asm/paravirt.h
index 0ca6befc53ee..2b1ab0a4eb35 100644
--- a/arch/sw_64/include/asm/paravirt.h
+++ b/arch/sw_64/include/asm/paravirt.h
@@ -3,14 +3,12 @@
 #define _ASM_SW64_PARAVIRT_H
 
 #ifdef CONFIG_PARAVIRT
+#include <linux/static_call_types.h>
+
 struct static_key;
 extern struct static_key paravirt_steal_enabled;
 extern struct static_key paravirt_steal_rq_enabled;
 
-struct pv_time_ops {
-	unsigned long long (*steal_clock)(int cpu);
-};
-
 struct pv_lock_ops {
 	void (*wait)(u8 *ptr, u8 val);
 	void (*kick)(int cpu);
@@ -20,17 +18,22 @@ struct pv_lock_ops {
 };
 
 struct paravirt_patch_template {
-	struct pv_time_ops time;
 	struct pv_lock_ops lock;
 };
 
 extern struct paravirt_patch_template pv_ops;
 
+u64 dummy_steal_clock(int cpu);
+
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+
 static inline u64 paravirt_steal_clock(int cpu)
 {
-	return pv_ops.time.steal_clock(cpu);
+	return static_call(pv_steal_clock)(cpu);
 }
 
+int __init pv_steal_time_init(void);
+
 __visible bool __native_vcpu_is_preempted(int cpu);
 
 static inline bool pv_vcpu_is_preempted(int cpu)
@@ -66,6 +69,7 @@ static inline void pv_queued_spin_unlock(struct qspinlock *lock)
 #else
 
 #define pv_qspinlock_init() do {} while (0)
+#define pv_steal_time_init() do {} while (0)
 
 #endif /* CONFIG_PARAVIRT */
 
diff --git a/arch/sw_64/kernel/paravirt.c b/arch/sw_64/kernel/paravirt.c
index e22a718fc525..4b2e0f44f709 100644
--- a/arch/sw_64/kernel/paravirt.c
+++ b/arch/sw_64/kernel/paravirt.c
@@ -9,8 +9,11 @@
 #include <linux/reboot.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/static_call.h>
+#include <linux/kvm_host.h>
 
 #include <asm/paravirt.h>
+#include <asm/pvtime.h>
 #include <asm/qspinlock_paravirt.h>
 
 struct static_key paravirt_steal_enabled;
@@ -56,3 +59,76 @@ void __init pv_qspinlock_init(void)
 	pv_ops.lock.kick = NULL;
 }
 #endif
+
+static u64 native_steal_clock(int cpu)
+{
+	return 0;
+}
+
+DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
+
+static DEFINE_PER_CPU(struct pvclock_vcpu_steal_time, pvclock_steal_time) __aligned(128);
+
+static bool steal_acc = true;
+static int __init parse_no_stealacc(char *arg)
+{
+	steal_acc = false;
+	return 0;
+}
+
+early_param("no-steal-acc", parse_no_stealacc);
+
+/* return steal time in ns */
+static u64 para_steal_clock(int cpu)
+{
+	struct pvclock_vcpu_steal_time  *st = per_cpu_ptr(&pvclock_steal_time, cpu);
+	u64 steal;
+	int version;
+
+	do {
+		version = READ_ONCE(st->version);
+		virt_rmb();
+		steal = READ_ONCE(st->steal_time);
+		virt_rmb();
+	} while ((version & 1) ||
+			version != READ_ONCE(st->version));
+
+	return steal;
+}
+
+static int steal_time_cpu_online(unsigned int cpu)
+{
+	struct pvclock_vcpu_steal_time *st = this_cpu_ptr(&pvclock_steal_time);
+
+	hcall(HCALL_SET_PVTIME_ST, __pa(st), 0, 0);
+
+	return 0;
+}
+
+static int steal_time_cpu_down_prepare(unsigned int cpu)
+{
+	hcall(HCALL_SET_PVTIME_ST, INVALID_GPA, 0, 0);
+	return 0;
+}
+
+int __init pv_steal_time_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+				"sw64/pvtime:online",
+				steal_time_cpu_online,
+				steal_time_cpu_down_prepare);
+	if (ret < 0)
+		return ret;
+
+	static_call_update(pv_steal_clock, para_steal_clock);
+
+	static_key_slow_inc(&paravirt_steal_enabled);
+	if (steal_acc)
+		static_key_slow_inc(&paravirt_steal_rq_enabled);
+
+	pr_info("using paravirt steal time\n");
+
+	return 0;
+}
diff --git a/arch/sw_64/kernel/time.c b/arch/sw_64/kernel/time.c
index c6cefd4383b5..50037c16210a 100644
--- a/arch/sw_64/kernel/time.c
+++ b/arch/sw_64/kernel/time.c
@@ -42,6 +42,9 @@ time_init(void)
 	sw64_setup_timer();
 	/* Calibrate the delay loop directly */
 	lpj_fine = cycle_freq / HZ;
+
+	if (is_in_guest())
+		pv_steal_time_init();
 }
 
 void clocksource_arch_init(struct clocksource *cs)
-- 
Gitee


From e67b665d03541eb5fc319136a3d074b7286d59eb Mon Sep 17 00:00:00 2001
From: Gu Zitao <guzitao@wxiat.com>
Date: Tue, 11 Nov 2025 16:58:00 +0800
Subject: [PATCH 4/5] sw64: fix random mmap base range

Sunway inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IDDHEJ

--------------------------------

The original random mmap base range was incorrect, it should be 0~256M,
but it was set to 0~1024G. So, fix it.

Signed-off-by: Gu Zitao <guzitao@wxiat.com>
Reviewed-by: He Sheng <hesheng@wxiat.com>
---
 arch/sw_64/mm/mmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sw_64/mm/mmap.c b/arch/sw_64/mm/mmap.c
index d4bf9a7d2627..45b3207d1466 100644
--- a/arch/sw_64/mm/mmap.c
+++ b/arch/sw_64/mm/mmap.c
@@ -113,7 +113,7 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 
 unsigned long arch_mmap_rnd(void)
 {
-	unsigned long rnd = get_random_long() & 0x7fffffful;
+	unsigned long rnd = get_random_long() & 0x7ffful;
 
 	return rnd << PAGE_SHIFT;
 }
-- 
Gitee


From 7bf17c7823507f83bafa2980e3d53c7b905b577e Mon Sep 17 00:00:00 2001
From: Jinyu Tang <tjytimi@163.com>
Date: Wed, 19 Nov 2025 08:29:09 +0800
Subject: [PATCH 5/5] sw64: ebpf: port arch_prepare_bpf_trampoline()

Sunway inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IDDHEJ

--------------------------------

Port the implementation of the arch_prepare_bpf_trampoline function
for SW64, which enabling support for eBPF fentry, fexit, and struct_ops
features.

We have tested these three features individually using the fentry, fexit,
and dummy test cases from the test_progs program under the tools directory.

Signed-off-by: Jinyu Tang <tjytimi@163.com>
Signed-off-by: Yizhou Chen <oneweek233@163.com>
Tested-by: Yizhou Chen <oneweek233@163.com>
Signed-off-by: Gu Yuchen <guyuchen@wxiat.com>
Reviewed-by: He Sheng <hesheng@wxiat.com>
Signed-off-by: Gu Zitao <guzitao@wxiat.com>
---
 arch/sw_64/net/bpf_jit_comp.c | 587 +++++++++++++++++++++++++++++++++-
 1 file changed, 585 insertions(+), 2 deletions(-)

diff --git a/arch/sw_64/net/bpf_jit_comp.c b/arch/sw_64/net/bpf_jit_comp.c
index a4f273e7725d..057461689832 100644
--- a/arch/sw_64/net/bpf_jit_comp.c
+++ b/arch/sw_64/net/bpf_jit_comp.c
@@ -24,13 +24,18 @@
 #include <linux/bpf.h>
 #include <linux/filter.h>
 #include <linux/printk.h>
+#include <linux/memory.h>
 
 #include <asm/cacheflush.h>
+#include <asm/insn.h>
+#include <asm/ftrace.h>
 
 #include "bpf_jit.h"
 
 #define TCALL_CNT (MAX_BPF_JIT_REG + 0)
-
+#define SW64_FENTRY_NINSNS 5
+#define SW64_MAX_REG_ARGS  6
+#define STACK_ALIGN 16
 static const int bpf2sw64[] = {
 	/* return value from in-kernel function, and exit value from eBPF */
 	[BPF_REG_0] = SW64_BPF_REG_V0,
@@ -61,6 +66,7 @@ struct jit_ctx {
 	int *insn_offset;	// [bpf_insn_idx] = jited_insn_idx
 	int exentry_idx;
 	u32 *image;		// JITed instruction
+	u32 *ro_image;
 	u32 stack_size;
 };
 
@@ -486,10 +492,11 @@ static int offset_to_epilogue(const struct jit_ctx *ctx)
 }
 
 /* For tail call, jump to set up function call stack */
-#define PROLOGUE_OFFSET	11
+#define PROLOGUE_OFFSET	(11 + SW64_FENTRY_NINSNS)
 
 static void build_prologue(struct jit_ctx *ctx, bool was_classic)
 {
+	int i;
 	const u8 r6 = bpf2sw64[BPF_REG_6];
 	const u8 r7 = bpf2sw64[BPF_REG_7];
 	const u8 r8 = bpf2sw64[BPF_REG_8];
@@ -497,6 +504,10 @@ static void build_prologue(struct jit_ctx *ctx, bool was_classic)
 	const u8 fp = bpf2sw64[BPF_REG_FP];
 	const u8 tcc = bpf2sw64[TCALL_CNT];
 
+	/* nops reserved for fentry call */
+	for (i = 0; i < SW64_FENTRY_NINSNS; i++)
+		emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx);
+
 	/* Save callee-saved registers */
 	emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, -64), ctx);
 	emit(SW64_BPF_STL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 0), ctx);
@@ -1370,6 +1381,578 @@ static inline void bpf_flush_icache(void *start, void *end)
 	flush_icache_range((unsigned long)start, (unsigned long)end);
 }
 
+static int __patch_insn_write(void *addr, const void *insn, size_t len)
+{
+	return copy_to_kernel_nofault(addr, insn, len);
+}
+
+int patch_insn_write(void *addr, const void *insn, size_t len)
+{
+	size_t size;
+	int ret;
+
+	while (len) {
+		size = min(len, PAGE_SIZE - offset_in_page(addr));
+
+		ret = __patch_insn_write(addr, insn, size);
+		if (ret)
+			return ret;
+
+		addr += size;
+		insn += size;
+		len -= size;
+	}
+
+	return 0;
+}
+
+int sw64_insn_copy(void *addr, const void *insns, size_t len)
+{
+	int ret;
+
+	ret = patch_insn_write(addr, insns, len);
+	if (!ret) {
+		flush_icache_range((unsigned long)addr, (unsigned long)addr + len);
+		mb();
+	}
+
+	return ret;
+}
+
+static int gen_call_or_nops(void *target, void *ip, u32 *insns, bool is_call)
+{
+	int i;
+	s64 offset;
+	s32 jmp_offset;
+	struct jit_ctx ctx = {
+		.image = insns,
+		.idx = 0,
+	};
+
+	if (!target) {
+		for (i = 0; i < SW64_FENTRY_NINSNS; i++)
+			emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR,
+					SW64_BPF_REG_ZR), &ctx);
+		return 0;
+	}
+	offset = (s64)((unsigned long)target - (unsigned long)ip);
+	if (offset >= -0x100000 && offset <= 0xfffff) {
+		jmp_offset = (s32)offset;
+		/* we must remember br in sw is 4 * disp， and -1 is for pc will add 1 when exec */
+		jmp_offset = jmp_offset/4 - 1;
+		emit(SW64_BPF_BR(is_call ? SW64_BPF_REG_AT : SW64_BPF_REG_ZR, jmp_offset), &ctx);
+	} else {
+		pr_err("bpf-jit: target offset 0x%llx is out of range\n", offset);
+		return -ERANGE;
+	}
+	return 0;
+}
+
+static void set_sw_nops(u32 *insns, int num)
+{
+	int i;
+	struct jit_ctx ctx = {
+		.image = insns,
+		.idx = 0,
+	};
+
+	for (i = 0; i < num; i++)
+		emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), &ctx);
+
+	return;
+
+}
+
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
+		       void *old_addr, void *new_addr)
+{
+
+	u32 old_insns[SW64_FENTRY_NINSNS], new_insns[SW64_FENTRY_NINSNS];
+	bool is_call = poke_type == BPF_MOD_CALL;
+	int ret;
+
+	if (!is_kernel_text((unsigned long)ip) &&
+	    !is_bpf_text_address((unsigned long)ip))
+		return -EOPNOTSUPP;
+
+	set_sw_nops(old_insns, SW64_FENTRY_NINSNS);
+	set_sw_nops(new_insns, SW64_FENTRY_NINSNS);
+
+	ret = gen_call_or_nops(old_addr, ip, old_insns, is_call);
+	if (ret)
+		return ret;
+	/* if not same, old addr is wrong, maybe change illegal */
+	if (memcmp(ip, old_insns, SW64_FENTRY_NINSNS * 4))
+		return -EFAULT;
+
+	ret = gen_call_or_nops(new_addr, ip, new_insns, is_call);
+	if (ret)
+		return ret;
+
+	cpus_read_lock();
+	mutex_lock(&text_mutex);
+	if (memcmp(ip, new_insns, SW64_FENTRY_NINSNS * 4))
+		ret = sw64_insn_copy(ip, new_insns, SW64_FENTRY_NINSNS * 4);
+	mutex_unlock(&text_mutex);
+	cpus_read_unlock();
+
+	return ret;
+}
+
+static int btf_func_model_nregs(const struct btf_func_model *m)
+{
+	int nregs = m->nr_args;
+	int i;
+
+	/* extra registers needed for struct argument */
+	for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) {
+		/* The arg_size is at most 16 bytes, enforced by the verifier. */
+		if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
+			nregs += (m->arg_size[i] + 7) / 8 - 1;
+	}
+
+	return nregs;
+}
+
+static void emit_sw64_call(u64 target, struct jit_ctx *ctx)
+{
+	unsigned long ip = (unsigned long)(ctx->ro_image + ctx->idx);
+	s64 offset = (s64)((unsigned long)target - (unsigned long)ip);
+
+	if (offset >= -0x100000 && offset <= 0xfffff) {
+		s32 jmp_offset = (s32)offset;
+		/* we must remember br in sw is 4 * disp， and -1 is for pc will add 1 when exec */
+		jmp_offset = jmp_offset/4 - 1;
+		emit(SW64_BPF_BR(SW64_BPF_REG_RA, jmp_offset), ctx);
+	} else {
+		emit_sw64_load_call_addr(SW64_BPF_REG_PV, target, ctx);
+		emit(SW64_BPF_CALL(SW64_BPF_REG_RA, SW64_BPF_REG_PV), ctx);
+	}
+
+}
+
+static void save_args(struct jit_ctx *ctx, int args_off, int nregs)
+{
+	int i;
+
+	for (i = 0; i < nregs; i++) {
+		if (i < SW64_MAX_REG_ARGS) {
+			emit(SW64_BPF_STL(i + SW64_BPF_REG_A0, SW64_BPF_REG_FP, -args_off), ctx);
+		} else {
+			emit(SW64_BPF_LDL(SW64_BPF_REG_T0,
+					SW64_BPF_REG_FP, 16 + (i - SW64_MAX_REG_ARGS) * 8), ctx);
+			emit(SW64_BPF_STL(SW64_BPF_REG_T0, SW64_BPF_REG_FP, -args_off), ctx);
+		}
+		args_off -= 8;
+	}
+}
+
+static void restore_args(struct jit_ctx *ctx, int args_off, int nr_reg_args)
+{
+	int i;
+
+	for (i = 0; i < nr_reg_args; i++) {
+		emit(SW64_BPF_LDL(i + SW64_BPF_REG_A0, SW64_BPF_REG_FP, -args_off), ctx);
+		args_off -= 8;
+	}
+}
+
+static void restore_stack_args(int nr_stack_args, int args_off, int stk_arg_off,
+			       struct jit_ctx *ctx)
+{
+	int i;
+
+	for (i = 0; i < nr_stack_args; i++) {
+		emit(SW64_BPF_LDL(SW64_BPF_REG_T0,
+				SW64_BPF_REG_FP, -(args_off - SW64_MAX_REG_ARGS * 8)), ctx);
+		emit(SW64_BPF_STL(SW64_BPF_REG_T0, SW64_BPF_REG_FP, -stk_arg_off), ctx);
+		args_off -= 8;
+		stk_arg_off -= 8;
+	}
+}
+
+void *bpf_arch_text_copy(void *dst, void *src, size_t len)
+{
+	int ret;
+
+	mutex_lock(&text_mutex);
+	ret = sw64_insn_copy(dst, src, len);
+	mutex_unlock(&text_mutex);
+
+	if (ret)
+		return ERR_PTR(-EINVAL);
+
+	return dst;
+}
+
+int bpf_arch_text_invalidate(void *dst, size_t len)
+{
+	int ret;
+	void *image = kzalloc(len, GFP_KERNEL);
+
+	mutex_lock(&text_mutex);
+	ret = sw64_insn_copy(dst, image, len);
+	mutex_unlock(&text_mutex);
+
+	kfree(image);
+	return ret;
+}
+
+static void sw64_invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
+	int args_off, int retval_off, int run_ctx_off, bool save_ret)
+{
+	u32 *branch;
+	u64 enter_prog;
+	u64 exit_prog;
+	struct bpf_prog *p = l->link.prog;
+	int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
+
+	enter_prog = (u64)bpf_trampoline_enter(p);
+	exit_prog = (u64)bpf_trampoline_exit(p);
+
+	if (l->cookie == 0) {
+		/* if cookie is zero, one instruction is enough to store it */
+		emit(SW64_BPF_STL(SW64_BPF_REG_ZR,
+				SW64_BPF_REG_FP, -run_ctx_off + cookie_off), ctx);
+	} else {
+		emit_sw64_ldu64(SW64_BPF_REG_T0, l->cookie, ctx);
+		emit(SW64_BPF_STL(SW64_BPF_REG_T0,
+				SW64_BPF_REG_FP, -run_ctx_off + cookie_off), ctx);
+	}
+
+	/* arg1: prog */
+	emit_sw64_ldu64(SW64_BPF_REG_A0, (const u64)p, ctx);
+	/* arg2: &run_ctx */
+	emit(SW64_BPF_LDI(SW64_BPF_REG_A1, SW64_BPF_REG_FP, -run_ctx_off), ctx);
+	emit_sw64_call(enter_prog, ctx);
+
+	/* save return value to callee saved register S0 , V0 is return value for sw64 */
+	emit(SW64_BPF_LDI(SW64_BPF_REG_S0, SW64_BPF_REG_V0, 0), ctx);
+
+	/* if (__bpf_prog_enter(prog) == 0)
+	 *         goto skip_exec_of_prog;
+	 */
+	branch = ctx->image + ctx->idx;
+	/* nop reserved for conditional jump */
+	emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx);
+
+	/*  must use BPF_REG_1(SW64_BPF_REG_A0), this is defined in clang */
+	emit(SW64_BPF_LDI(SW64_BPF_REG_A0, SW64_BPF_REG_FP, -args_off), ctx);
+	if (!p->jited)
+		emit_sw64_ldu64(SW64_BPF_REG_A1, (const u64)p->insnsi, ctx);
+	emit_sw64_call((const u64)p->bpf_func, ctx);
+
+	if (save_ret)
+		emit(SW64_BPF_STL(SW64_BPF_REG_V0, SW64_BPF_REG_FP, -retval_off), ctx);
+
+	if (ctx->image) {
+		/* we must remember pc will add 1 when exec in sw */
+		int offset = ctx->image + ctx->idx - branch - 1;
+		*branch = SW64_BPF_BEQ(SW64_BPF_REG_V0, offset);
+	}
+
+	/* arg1: prog */
+	emit_sw64_ldu64(SW64_BPF_REG_A0, (const u64)p, ctx);
+	/* arg2: start time */
+	emit(SW64_BPF_LDI(SW64_BPF_REG_A1, SW64_BPF_REG_S0, 0), ctx);
+	/* arg3: &run_ctx */
+	emit(SW64_BPF_LDI(SW64_BPF_REG_A2, SW64_BPF_REG_FP, -run_ctx_off), ctx);
+	emit_sw64_call(exit_prog, ctx);
+}
+
+static void sw64_invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
+	int args_off, int retval_off, int run_ctx_off, u32 **branches)
+{
+	int i;
+
+	/*
+	 * The first fmod_ret program will receive a garbage return value.
+	 * Set this to 0 to avoid confusing the program.
+	 */
+	emit(SW64_BPF_STL(SW64_BPF_REG_ZR, SW64_BPF_REG_FP, -retval_off), ctx);
+	for (i = 0; i < tl->nr_links; i++) {
+		sw64_invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off,
+				run_ctx_off, true);
+		/* if (*(u64 *)(sp + retval_off) !=  0)
+		 *	goto do_fexit;
+		 */
+		emit(SW64_BPF_LDL(SW64_BPF_REG_T0, SW64_BPF_REG_FP, -retval_off), ctx);
+		/*
+		 * Save the location of branch, and generate a nop.
+		 * This nop will be replaced with a BNE later.
+		 */
+		branches[i] = ctx->image + ctx->idx;
+		emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx);
+	}
+}
+
+static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
+	struct bpf_tramp_links *tlinks, void *func_addr,
+	int nregs, u32 flags)
+{
+	int i, offset;
+	u32 **branches = NULL;
+	int stack_size = 0;
+	int retval_off, args_off, nregs_off, ip_off, run_ctx_off, sreg_off, stk_arg_off;
+	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
+	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
+	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+	bool save_ret;
+	void *orig_call = func_addr;
+
+	/* Two types of generated trampoline stack layout:
+	 *
+	 * 1. trampoline called from function entry
+	 * --------------------------------------
+	 * FP + 8	    [ RA to parent func	] return address to parent
+	 *					  function
+	 * FP + 0	    [ FP of parent func ] frame pointer of parent
+	 *					  function
+	 * FP - 8       [ R28 (BPF_AT) to traced func ] return address of traced
+	 *					  function
+	 * FP - 16	    [ FP of traced func ] frame pointer of traced
+	 *					  function
+	 * FP - 24	    [ GP of traced func ] global pointer of traced
+	 *					  function
+	 * --------------------------------------
+	 *
+	 * 2. trampoline called directly
+	 * --------------------------------------
+	 * FP - 8	    [ RA to caller func ] return address to caller
+	 *					  function
+	 * FP - 16	    [ FP of caller func	] frame pointer of caller
+	 *					  function
+	 * FP - 24	    [ GP of caller func	] global pointer of caller
+	 *					  function
+	 * --------------------------------------
+	 *
+	 * FP - retval_off  [ return value      ] BPF_TRAMP_F_CALL_ORIG or
+	 *					  BPF_TRAMP_F_RET_FENTRY_RET
+	 *                  [ argN              ]
+	 *                  [ ...               ]
+	 * FP - args_off    [ arg1              ]
+	 *
+	 * FP - nregs_off   [ regs count        ]
+	 *
+	 * FP - ip_off      [ traced func	] BPF_TRAMP_F_IP_ARG
+	 *
+	 * FP - run_ctx_off [ bpf_tramp_run_ctx ]
+	 *
+	 * FP - sreg_off    [ callee saved reg	]
+	 *
+	 *		    [ pads              ] pads for 16 bytes alignment
+	 *
+	 *		    [ stack_argN        ]
+	 *		    [ ...               ]
+	 * FP - stk_arg_off [ stack_arg1        ] BPF_TRAMP_F_CALL_ORIG
+	 */
+
+	if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY))
+		return -EOPNOTSUPP;
+
+	/* room of trampoline frame to store return address, frame pointer and GP */
+	stack_size += 24;
+
+	save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
+	if (save_ret) {
+		stack_size += 8; /* Save (BPF R0) or SW A0, in sw64, they are the same */
+		retval_off = stack_size;
+	}
+
+	stack_size += nregs * 8;
+	args_off = stack_size;
+
+	stack_size += 8;
+	nregs_off = stack_size;
+
+	if (flags & BPF_TRAMP_F_IP_ARG) {
+		stack_size += 8;
+		ip_off = stack_size;
+	}
+
+	stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
+	run_ctx_off = stack_size;
+
+	stack_size += 8;
+	sreg_off = stack_size;
+
+	if ((flags & BPF_TRAMP_F_CALL_ORIG) && (nregs - SW64_MAX_REG_ARGS > 0))
+		stack_size += (nregs - SW64_MAX_REG_ARGS) * 8;
+
+	stack_size = round_up(stack_size, STACK_ALIGN);
+
+	/* room for args on stack must be at the top of stack */
+	stk_arg_off = stack_size;
+
+	if (func_addr) {
+		/* For the trampoline called from function entry,
+		 * the frame of traced function and the frame of
+		 * trampoline need to be considered.
+		 */
+		emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, -16), ctx);
+		emit(SW64_BPF_STL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 8), ctx);
+		emit(SW64_BPF_STL(SW64_BPF_REG_FP, SW64_BPF_REG_SP, 0), ctx);
+		emit(SW64_BPF_LDI(SW64_BPF_REG_FP, SW64_BPF_REG_SP, 16), ctx);
+
+		emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, -stack_size), ctx);
+		emit(SW64_BPF_STL(SW64_BPF_REG_AT, SW64_BPF_REG_SP, stack_size - 8), ctx);
+		emit(SW64_BPF_STL(SW64_BPF_REG_FP, SW64_BPF_REG_SP, stack_size - 16), ctx);
+		emit(SW64_BPF_STL(SW64_BPF_REG_GP, SW64_BPF_REG_SP, stack_size - 24), ctx);
+		emit(SW64_BPF_LDI(SW64_BPF_REG_FP, SW64_BPF_REG_SP, stack_size), ctx);
+	} else {
+		/* For the trampoline called directly, just handle
+		 * the frame of trampoline.
+		 */
+		emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, -stack_size), ctx);
+		emit(SW64_BPF_STL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, stack_size - 8), ctx);
+		emit(SW64_BPF_STL(SW64_BPF_REG_FP, SW64_BPF_REG_SP, stack_size - 16), ctx);
+		emit(SW64_BPF_STL(SW64_BPF_REG_GP, SW64_BPF_REG_SP, stack_size - 24), ctx);
+		emit(SW64_BPF_LDI(SW64_BPF_REG_FP, SW64_BPF_REG_SP, stack_size), ctx);
+	}
+
+	/*
+	 * callee saved register S0 to pass start time,
+	 * we need to remember it in invoke_bpf_prog
+	 */
+	emit(SW64_BPF_STL(SW64_BPF_REG_S0, SW64_BPF_REG_FP, -sreg_off), ctx);
+
+	/* store ip address of the traced function */
+	if (flags & BPF_TRAMP_F_IP_ARG) {
+		emit_sw64_ldu64(SW64_BPF_REG_T0, (const u64)func_addr, ctx);
+		emit(SW64_BPF_STL(SW64_BPF_REG_T0, SW64_BPF_REG_FP, -ip_off), ctx);
+	}
+
+	emit(SW64_BPF_LDI(SW64_BPF_REG_T0, SW64_BPF_REG_ZR, nregs), ctx);
+	emit(SW64_BPF_STL(SW64_BPF_REG_T0, SW64_BPF_REG_FP, -nregs_off), ctx);
+
+	save_args(ctx, args_off, nregs);
+
+	if (flags & BPF_TRAMP_F_SKIP_FRAME)
+		orig_call += MCOUNT_INSN_SIZE;
+
+	if (flags & BPF_TRAMP_F_CALL_ORIG) {
+		emit_sw64_ldu64(SW64_BPF_REG_A0, (const u64)im, ctx);
+		emit_sw64_call((const u64)__bpf_tramp_enter, ctx);
+	}
+
+	for (i = 0; i < fentry->nr_links; i++)
+		sw64_invoke_bpf_prog(ctx, fentry->links[i], args_off, retval_off,
+				run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET);
+
+	if (fmod_ret->nr_links) {
+		branches = kcalloc(fmod_ret->nr_links, sizeof(u32 *), GFP_KERNEL);
+		if (!branches)
+			return -ENOMEM;
+
+		sw64_invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, run_ctx_off, branches);
+	}
+
+	if (flags & BPF_TRAMP_F_CALL_ORIG) {
+		restore_args(ctx, args_off, min_t(int, nregs, SW64_MAX_REG_ARGS));
+		restore_stack_args(nregs - SW64_MAX_REG_ARGS, args_off, stk_arg_off, ctx);
+		/* call original func */
+		emit_sw64_call((const u64)orig_call, ctx);
+		/* store return value */
+		emit(SW64_BPF_STL(SW64_BPF_REG_V0, SW64_BPF_REG_FP, -retval_off), ctx);
+		/* reserve a nop for bpf_tramp_image_put */
+		im->ip_after_call = ctx->image + ctx->idx;
+		/* reserved 16 nop for long jmp, that is enough */
+		for (i = 0; i < 16; i++)
+			emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR,
+					SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx);
+	}
+
+	for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) {
+		/* we must remember pc will add 1 when exec in sw */
+		offset = ctx->image + ctx->idx - branches[i] - 1;
+		*branches[i] = SW64_BPF_BNE(SW64_BPF_REG_T0, offset);
+	}
+
+	for (i = 0; i < fexit->nr_links; i++)
+		sw64_invoke_bpf_prog(ctx, fexit->links[i], args_off,
+				retval_off, run_ctx_off, false);
+
+	if (flags & BPF_TRAMP_F_CALL_ORIG) {
+		im->ip_epilogue = ctx->image + ctx->idx;
+		/* for the first pass, assume the worst case */
+		emit_sw64_ldu64(SW64_BPF_REG_A0, (const u64)im, ctx);
+		emit_sw64_call((const u64)__bpf_tramp_exit, ctx);
+	}
+
+	if (flags & BPF_TRAMP_F_RESTORE_REGS)
+		restore_args(ctx, args_off, min_t(int, nregs, SW64_MAX_REG_ARGS));
+
+	if (save_ret)
+		emit(SW64_BPF_LDL(SW64_BPF_REG_V0, SW64_BPF_REG_FP, -retval_off), ctx);
+
+	/* callee saved register S0 to transmit start time, so use this reg, now we restore it  */
+	emit(SW64_BPF_LDL(SW64_BPF_REG_S0, SW64_BPF_REG_FP, -sreg_off), ctx);
+
+	if (func_addr) {
+		/* trampoline called from function entry */
+		emit(SW64_BPF_LDL(SW64_BPF_REG_AT, SW64_BPF_REG_SP, stack_size - 8), ctx);
+		emit(SW64_BPF_LDL(SW64_BPF_REG_FP, SW64_BPF_REG_SP, stack_size - 16), ctx);
+		emit(SW64_BPF_LDL(SW64_BPF_REG_GP, SW64_BPF_REG_SP, stack_size - 24), ctx);
+		emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, stack_size), ctx);
+
+		emit(SW64_BPF_LDL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 8), ctx);
+		emit(SW64_BPF_LDL(SW64_BPF_REG_FP, SW64_BPF_REG_SP, 0), ctx);
+		emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, 16), ctx);
+
+		if (flags & BPF_TRAMP_F_SKIP_FRAME)
+			/* return to parent function */
+			emit(SW64_BPF_RET(SW64_BPF_REG_RA), ctx);
+		else
+			/* return to traced function */
+			emit(SW64_BPF_RET(SW64_BPF_REG_AT), ctx);
+	} else {
+		/* trampoline called directly */
+		emit(SW64_BPF_LDL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, stack_size - 8), ctx);
+		emit(SW64_BPF_LDL(SW64_BPF_REG_FP, SW64_BPF_REG_SP, stack_size - 16), ctx);
+		emit(SW64_BPF_LDL(SW64_BPF_REG_GP, SW64_BPF_REG_SP, stack_size - 24), ctx);
+		emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, stack_size), ctx);
+
+		emit(SW64_BPF_RET(SW64_BPF_REG_RA), ctx);
+	}
+
+	kfree(branches);
+
+	return ctx->idx;
+}
+
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
+	const struct btf_func_model *m, u32 flags,
+	struct bpf_tramp_links *tlinks,
+	void *func_addr)
+{
+	int ret;
+	int nregs;
+	struct jit_ctx ctx = {
+		.image = NULL,
+		.ro_image = image,
+		.idx = 0,
+	};
+
+	nregs = btf_func_model_nregs(m);
+
+	ret = __arch_prepare_bpf_trampoline(&ctx, im, tlinks, func_addr, nregs, flags);
+	if (ret < 0)
+		return ret;
+
+	if (ret * SW64_INSN_SIZE > (long)image_end - (long)image)
+		return -EFBIG;
+
+	ctx.image = image;
+	ctx.idx = 0;
+
+	ret = __arch_prepare_bpf_trampoline(&ctx, im, tlinks, func_addr, nregs, flags);
+	if (ret < 0)
+		goto out;
+
+out:
+	return ret < 0 ? ret : ret * SW64_INSN_SIZE;
+}
+
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
 	struct bpf_prog *tmp, *orig_prog = prog;
-- 
Gitee