From e0f58877ab5789a005ad10186a7f4a8e946da1e5 Mon Sep 17 00:00:00 2001 From: Shao Zhiyu Date: Thu, 9 Oct 2025 07:15:56 +0000 Subject: [PATCH 1/5] sw64: pci: remove redundant debug control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IDDHEJ -------------------------------- The 'CONFIG_PCI_DEBUG' in sunway pci controller driver is not functional as kernel recognizes pci controllers as devices, so remove the old debug check and use 'dev_dbg()' instead. This commit also enable 'CONFIG_DYNAMIC_DEBUG' to control debug in sunway pci controller and developers can now use parameter dyndbg="file [FILE] +p" in kernel command line to do runtime debug. Signed-off-by: Shao Zhiyu Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- drivers/pci/controller/pci-sunway.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/pci/controller/pci-sunway.c b/drivers/pci/controller/pci-sunway.c index d8e291decf65..e3487e2ea43c 100644 --- a/drivers/pci/controller/pci-sunway.c +++ b/drivers/pci/controller/pci-sunway.c @@ -562,11 +562,10 @@ static int pci_read_rc_cfg(struct pci_bus *bus, unsigned int devfn, struct pci_controller *hose = pci_bus_to_pci_controller(bus); void __iomem *cfg_iobase = hose->rc_config_space_base; - if (IS_ENABLED(CONFIG_PCI_DEBUG)) - pr_debug("rc read addr:%px bus %d, devfn %#x, where %#x size=%d\t", - cfg_iobase + ((where & ~3) << 5), - bus->number, - devfn, where, size); + dev_dbg(&bus->dev, "rc read addr:%px bus %d, devfn %#x, where %#x size=%d\t", + cfg_iobase + ((where & ~3) << 5), + bus->number, + devfn, where, size); if ((uintptr_t)where & (size - 1)) { *val = 0; @@ -596,8 +595,7 @@ static int pci_read_rc_cfg(struct pci_bus *bus, unsigned int devfn, break; } - if (IS_ENABLED(CONFIG_PCI_DEBUG)) - pr_debug("*val %#x\n ", *val); + dev_dbg(&bus->dev, "*val %#x\n ", *val); return PCIBIOS_SUCCESSFUL; } @@ -632,11 +630,10 @@ static int pci_write_rc_cfg(struct pci_bus *bus, unsigned int devfn, break; } - if (IS_ENABLED(CONFIG_PCI_DEBUG)) - pr_debug("rc write addr:%px bus %d, devfn %#x, where %#x *val %#x size %d\n", - cfg_iobase + ((where & ~3) << 5), - bus->number, - devfn, where, val, size); + dev_dbg(&bus->dev, "rc write addr:%px bus %d, devfn %#x, where %#x *val %#x size %d\n", + cfg_iobase + ((where & ~3) << 5), + bus->number, + devfn, where, val, size); writel(data, cfg_iobase + ((where & ~3) << 5)); @@ -762,9 +759,8 @@ void __iomem *sunway_pci_map_bus(struct pci_bus *bus, cfg_iobase = hose->ep_config_space_base + relbus; - if (IS_ENABLED(CONFIG_PCI_DEBUG)) - pr_debug("addr:%px bus %d, devfn %d, where %d\n", - cfg_iobase, bus->number, devfn, where); + dev_dbg(&bus->dev, "addr:%px bus %d, devfn %d, where %d\n", + cfg_iobase, bus->number, devfn, where); return cfg_iobase; } EXPORT_SYMBOL(sunway_pci_map_bus); -- Gitee From 8af49cb59e51e5e9865cfc678671a08d0255a019 Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Thu, 30 Oct 2025 17:09:24 +0800 Subject: [PATCH 2/5] sw64: kvm: add pv steal time support Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IDDHEJ -------------------------------- Introduce a per-cpu structure pvclock_steal_time to store the steal time for each vcpu. When VM exits due to KVM_EXIT_SET_PVTIME_ST exception, it passes address to struct kvm_steal_time and informs the hypervisor to enable steal time. Additionally, KVM_REQ_RECORD_STEAL is added to handle updates of the steal time of vcpu. Signed-off-by: Chen Wang Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/include/asm/kvm_asm.h | 3 +- arch/sw_64/include/asm/kvm_host.h | 21 +++++++++ arch/sw_64/include/asm/pvtime.h | 16 +++++++ arch/sw_64/kvm/Makefile | 3 +- arch/sw_64/kvm/handle_exit.c | 4 ++ arch/sw_64/kvm/pvtime.c | 78 +++++++++++++++++++++++++++++++ arch/sw_64/kvm/sw64.c | 8 ++++ 7 files changed, 131 insertions(+), 2 deletions(-) create mode 100644 arch/sw_64/include/asm/pvtime.h create mode 100644 arch/sw_64/kvm/pvtime.c diff --git a/arch/sw_64/include/asm/kvm_asm.h b/arch/sw_64/include/asm/kvm_asm.h index a8e8ef3d68a0..f714f5ea12ae 100644 --- a/arch/sw_64/include/asm/kvm_asm.h +++ b/arch/sw_64/include/asm/kvm_asm.h @@ -14,9 +14,9 @@ #define SW64_KVM_EXIT_RESTART 17 #define SW64_KVM_EXIT_APT_FAULT 18 #define SW64_KVM_EXIT_FATAL_ERROR 22 +#define SW64_KVM_EXIT_SET_PVTIME_ST 23 #define SW64_KVM_EXIT_DEBUG 24 - #define kvm_sw64_exception_type \ {0, "HOST_INTR" }, \ {1, "IO" }, \ @@ -28,6 +28,7 @@ {17, "RESTART" }, \ {18, "APT_FAULT" }, \ {22, "FATAL_ERROR" }, \ + {23, "SET_PVTIME_ST" }, \ {24, "DEBUG" } diff --git a/arch/sw_64/include/asm/kvm_host.h b/arch/sw_64/include/asm/kvm_host.h index 7dd763a7d4c2..33df07b9a665 100644 --- a/arch/sw_64/include/asm/kvm_host.h +++ b/arch/sw_64/include/asm/kvm_host.h @@ -73,6 +73,8 @@ #define KVM_PHYS_SIZE (_AC(1, ULL) << KVM_PHYS_SHIFT) #define KVM_PHYS_MASK (KVM_PHYS_SIZE - _AC(1, ULL)) +#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(0) + struct kvm_arch_memory_slot { }; @@ -121,6 +123,12 @@ struct kvm_vcpu_arch { /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; + + /* Guest steal-time state */ + struct { + gpa_t base; + u64 last_steal; + } steal; }; struct vmem_info { @@ -216,4 +224,17 @@ static inline bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu) return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu; } +void kvm_init_steal_time(struct kvm_vcpu *vcpu); +void kvm_sw64_record_steal_time(struct kvm_vcpu *vcpu); + +static inline void kvm_sw64_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) +{ + vcpu_arch->steal.base = INVALID_GPA; + vcpu_arch->steal.last_steal = 0; +} + +static inline bool kvm_sw64_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch) +{ + return (vcpu_arch->steal.base != INVALID_GPA); +} #endif /* _ASM_SW64_KVM_HOST_H */ diff --git a/arch/sw_64/include/asm/pvtime.h b/arch/sw_64/include/asm/pvtime.h new file mode 100644 index 000000000000..14a52a0e493b --- /dev/null +++ b/arch/sw_64/include/asm/pvtime.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2025 SW64 Ltd. */ + +#ifndef __ASM_PVTIME_ABI_H +#define __ASM_PVTIME_ABI_H + +/* The below structure is defined in SW64 */ + +struct pvclock_vcpu_steal_time { + __u64 steal_time; + __u32 version; + /* Structure must be 128 byte aligned, pad to that size */ + __u32 padding[29]; +} __packed; + +#endif diff --git a/arch/sw_64/kvm/Makefile b/arch/sw_64/kvm/Makefile index 530170e9167d..22f3194ad477 100644 --- a/arch/sw_64/kvm/Makefile +++ b/arch/sw_64/kvm/Makefile @@ -9,7 +9,8 @@ include $(srctree)/virt/kvm/Makefile.kvm obj-$(CONFIG_KVM) += kvm.o -kvm-y += sw64.o emulate.o mmio.o kvm_timer.o handle_exit.o perf.o +kvm-y += sw64.o emulate.o mmio.o kvm_timer.o handle_exit.o perf.o \ + pvtime.o kvm-$(CONFIG_SUBARCH_C3B) += kvm_core3.o entry_core3.o kvm-$(CONFIG_SUBARCH_C4) += kvm_core4.o mmu.o entry_core4.o diff --git a/arch/sw_64/kvm/handle_exit.c b/arch/sw_64/kvm/handle_exit.c index 8c6c7325ccfc..3a623c59895a 100644 --- a/arch/sw_64/kvm/handle_exit.c +++ b/arch/sw_64/kvm/handle_exit.c @@ -73,6 +73,10 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; vcpu->run->hw.hardware_exit_reason = hargs->arg0; return 0; + case SW64_KVM_EXIT_SET_PVTIME_ST: + vcpu->arch.steal.base = hargs->arg0; + kvm_init_steal_time(vcpu); + return 1; } return 1; diff --git a/arch/sw_64/kvm/pvtime.c b/arch/sw_64/kvm/pvtime.c new file mode 100644 index 000000000000..767617d4c5e0 --- /dev/null +++ b/arch/sw_64/kvm/pvtime.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2025 SW64 Ltd. + +#include +#include +#include + +void kvm_sw64_record_steal_time(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + u64 base = vcpu->arch.steal.base; + u64 last_steal = vcpu->arch.steal.last_steal; + u64 __user *steal_ptr; + u32 __user *version_ptr; + u64 offset_s = offsetof(struct pvclock_vcpu_steal_time, steal_time); + u64 offset_v = offsetof(struct pvclock_vcpu_steal_time, version); + u64 steal, stealInc, hva; + u32 version; + gfn_t gfn; + int idx; + + if (base == INVALID_GPA) + return; + + idx = srcu_read_lock(&kvm->srcu); + gfn = base >> PAGE_SHIFT; + hva = kvm_vcpu_gfn_to_hva(vcpu, gfn); + if (WARN_ON(kvm_is_error_hva(hva))) { + vcpu->arch.steal.base = INVALID_GPA; + return; + } + + steal_ptr = (__u64 __user *)(hva + offset_in_page(base) + offset_s); + version_ptr = (__u32 __user *)(hva + offset_in_page(base) + offset_v); + + if (WARN_ON(get_user(version, version_ptr))) + return; + + version += 1; + + if (WARN_ON(put_user(version, version_ptr))) + return; + + if (!WARN_ON(get_user(steal, steal_ptr))) { + vcpu->arch.steal.last_steal = READ_ONCE(current->sched_info.run_delay); + stealInc += vcpu->arch.steal.last_steal - last_steal; + if (stealInc) { + steal += stealInc; + WARN_ON(put_user(steal, steal_ptr)); + } + } + + version += 1; + WARN_ON(put_user(version, version_ptr)); + + kvm_vcpu_mark_page_dirty(vcpu, gfn); + srcu_read_unlock(&kvm->srcu, idx); +} + +void kvm_init_steal_time(struct kvm_vcpu *vcpu) +{ + struct pvclock_vcpu_steal_time init_values = {}; + struct kvm *kvm = vcpu->kvm; + u64 base = vcpu->arch.steal.base; + int idx; + + if (base == INVALID_GPA) + return; + + /* + * Start counting stolen time from the time the guest + * requests to set pvtime. + */ + vcpu->arch.steal.last_steal = current->sched_info.run_delay; + idx = srcu_read_lock(&kvm->srcu); + kvm_write_guest(kvm, base, &init_values, sizeof(init_values)); + srcu_read_unlock(&kvm->srcu, idx); +} diff --git a/arch/sw_64/kvm/sw64.c b/arch/sw_64/kvm/sw64.c index 73c547fb674c..0af03153cb22 100644 --- a/arch/sw_64/kvm/sw64.c +++ b/arch/sw_64/kvm/sw64.c @@ -122,6 +122,9 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu) tbivpn(0, 0, vpn); } + if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu)) + kvm_sw64_record_steal_time(vcpu); + if (kvm_dirty_ring_check_request(vcpu)) return 0; } @@ -234,6 +237,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.tsk = current; vcpu->arch.pcpu_id = -1; /* force flush tlb for the first time */ + kvm_sw64_pvtime_vcpu_init(&vcpu->arch); + return 0; } @@ -281,6 +286,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { vcpu->cpu = cpu; + + if (kvm_sw64_is_pvtime_enabled(&vcpu->arch)) + kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) -- Gitee From fde74986ec2ae11e3fefb73a83b56fac53468b74 Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Wed, 15 Oct 2025 08:56:02 +0800 Subject: [PATCH 3/5] sw64: paravirt: add pv steal_time support in guest side Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IDDHEJ -------------------------------- Add a new hcall HCALL_SET_PVTIME_ST to pass the GPA of guest steal time structure to hypervisor. Meanwhile, para_steal_clock() function is used to read the amount of time stolen from the current vcpu. Signed-off-by: Chen Wang Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/Kconfig | 11 +++++ arch/sw_64/include/asm/hcall.h | 1 + arch/sw_64/include/asm/paravirt.h | 16 ++++--- arch/sw_64/kernel/paravirt.c | 76 +++++++++++++++++++++++++++++++ arch/sw_64/kernel/time.c | 3 ++ 5 files changed, 101 insertions(+), 6 deletions(-) diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index 75b3cf116dc8..583da0bc4a20 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -475,6 +475,17 @@ config PARAVIRT under a hypervisor, potentially improving performance significantly over full virtualization. +config PARAVIRT_TIME_ACCOUNTING + bool "Paravirtual steal time accounting" + depends on PARAVIRT + help + Select this option to enable fine granularity task steal time + accounting. Time spent executing other tasks in parallel with + the current vCPU is discounted from the vCPU power. To account + for that, there can be a small performance impact. + + If in doubt, say N here. + config USE_PERCPU_NUMA_NODE_ID def_bool y depends on NUMA diff --git a/arch/sw_64/include/asm/hcall.h b/arch/sw_64/include/asm/hcall.h index 40eab985fcbd..04a853dc3938 100644 --- a/arch/sw_64/include/asm/hcall.h +++ b/arch/sw_64/include/asm/hcall.h @@ -18,6 +18,7 @@ enum HCALL_TYPE { HCALL_SWNET = 20, /* guest request swnet service */ HCALL_SWNET_IRQ = 21, /* guest request swnet intr */ HCALL_FATAL_ERROR = 22, /* guest fatal error, issued by hmcode */ + HCALL_SET_PVTIME_ST = 23, /* guest pvtime set gpa */ NR_HCALL }; diff --git a/arch/sw_64/include/asm/paravirt.h b/arch/sw_64/include/asm/paravirt.h index 0ca6befc53ee..2b1ab0a4eb35 100644 --- a/arch/sw_64/include/asm/paravirt.h +++ b/arch/sw_64/include/asm/paravirt.h @@ -3,14 +3,12 @@ #define _ASM_SW64_PARAVIRT_H #ifdef CONFIG_PARAVIRT +#include + struct static_key; extern struct static_key paravirt_steal_enabled; extern struct static_key paravirt_steal_rq_enabled; -struct pv_time_ops { - unsigned long long (*steal_clock)(int cpu); -}; - struct pv_lock_ops { void (*wait)(u8 *ptr, u8 val); void (*kick)(int cpu); @@ -20,17 +18,22 @@ struct pv_lock_ops { }; struct paravirt_patch_template { - struct pv_time_ops time; struct pv_lock_ops lock; }; extern struct paravirt_patch_template pv_ops; +u64 dummy_steal_clock(int cpu); + +DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock); + static inline u64 paravirt_steal_clock(int cpu) { - return pv_ops.time.steal_clock(cpu); + return static_call(pv_steal_clock)(cpu); } +int __init pv_steal_time_init(void); + __visible bool __native_vcpu_is_preempted(int cpu); static inline bool pv_vcpu_is_preempted(int cpu) @@ -66,6 +69,7 @@ static inline void pv_queued_spin_unlock(struct qspinlock *lock) #else #define pv_qspinlock_init() do {} while (0) +#define pv_steal_time_init() do {} while (0) #endif /* CONFIG_PARAVIRT */ diff --git a/arch/sw_64/kernel/paravirt.c b/arch/sw_64/kernel/paravirt.c index e22a718fc525..4b2e0f44f709 100644 --- a/arch/sw_64/kernel/paravirt.c +++ b/arch/sw_64/kernel/paravirt.c @@ -9,8 +9,11 @@ #include #include #include +#include +#include #include +#include #include struct static_key paravirt_steal_enabled; @@ -56,3 +59,76 @@ void __init pv_qspinlock_init(void) pv_ops.lock.kick = NULL; } #endif + +static u64 native_steal_clock(int cpu) +{ + return 0; +} + +DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); + +static DEFINE_PER_CPU(struct pvclock_vcpu_steal_time, pvclock_steal_time) __aligned(128); + +static bool steal_acc = true; +static int __init parse_no_stealacc(char *arg) +{ + steal_acc = false; + return 0; +} + +early_param("no-steal-acc", parse_no_stealacc); + +/* return steal time in ns */ +static u64 para_steal_clock(int cpu) +{ + struct pvclock_vcpu_steal_time *st = per_cpu_ptr(&pvclock_steal_time, cpu); + u64 steal; + int version; + + do { + version = READ_ONCE(st->version); + virt_rmb(); + steal = READ_ONCE(st->steal_time); + virt_rmb(); + } while ((version & 1) || + version != READ_ONCE(st->version)); + + return steal; +} + +static int steal_time_cpu_online(unsigned int cpu) +{ + struct pvclock_vcpu_steal_time *st = this_cpu_ptr(&pvclock_steal_time); + + hcall(HCALL_SET_PVTIME_ST, __pa(st), 0, 0); + + return 0; +} + +static int steal_time_cpu_down_prepare(unsigned int cpu) +{ + hcall(HCALL_SET_PVTIME_ST, INVALID_GPA, 0, 0); + return 0; +} + +int __init pv_steal_time_init(void) +{ + int ret; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "sw64/pvtime:online", + steal_time_cpu_online, + steal_time_cpu_down_prepare); + if (ret < 0) + return ret; + + static_call_update(pv_steal_clock, para_steal_clock); + + static_key_slow_inc(¶virt_steal_enabled); + if (steal_acc) + static_key_slow_inc(¶virt_steal_rq_enabled); + + pr_info("using paravirt steal time\n"); + + return 0; +} diff --git a/arch/sw_64/kernel/time.c b/arch/sw_64/kernel/time.c index c6cefd4383b5..50037c16210a 100644 --- a/arch/sw_64/kernel/time.c +++ b/arch/sw_64/kernel/time.c @@ -42,6 +42,9 @@ time_init(void) sw64_setup_timer(); /* Calibrate the delay loop directly */ lpj_fine = cycle_freq / HZ; + + if (is_in_guest()) + pv_steal_time_init(); } void clocksource_arch_init(struct clocksource *cs) -- Gitee From e67b665d03541eb5fc319136a3d074b7286d59eb Mon Sep 17 00:00:00 2001 From: Gu Zitao Date: Tue, 11 Nov 2025 16:58:00 +0800 Subject: [PATCH 4/5] sw64: fix random mmap base range Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IDDHEJ -------------------------------- The original random mmap base range was incorrect, it should be 0~256M, but it was set to 0~1024G. So, fix it. Signed-off-by: Gu Zitao Reviewed-by: He Sheng --- arch/sw_64/mm/mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sw_64/mm/mmap.c b/arch/sw_64/mm/mmap.c index d4bf9a7d2627..45b3207d1466 100644 --- a/arch/sw_64/mm/mmap.c +++ b/arch/sw_64/mm/mmap.c @@ -113,7 +113,7 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, unsigned long arch_mmap_rnd(void) { - unsigned long rnd = get_random_long() & 0x7fffffful; + unsigned long rnd = get_random_long() & 0x7ffful; return rnd << PAGE_SHIFT; } -- Gitee From 7bf17c7823507f83bafa2980e3d53c7b905b577e Mon Sep 17 00:00:00 2001 From: Jinyu Tang Date: Wed, 19 Nov 2025 08:29:09 +0800 Subject: [PATCH 5/5] sw64: ebpf: port arch_prepare_bpf_trampoline() Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IDDHEJ -------------------------------- Port the implementation of the arch_prepare_bpf_trampoline function for SW64, which enabling support for eBPF fentry, fexit, and struct_ops features. We have tested these three features individually using the fentry, fexit, and dummy test cases from the test_progs program under the tools directory. Signed-off-by: Jinyu Tang Signed-off-by: Yizhou Chen Tested-by: Yizhou Chen Signed-off-by: Gu Yuchen Reviewed-by: He Sheng Signed-off-by: Gu Zitao --- arch/sw_64/net/bpf_jit_comp.c | 587 +++++++++++++++++++++++++++++++++- 1 file changed, 585 insertions(+), 2 deletions(-) diff --git a/arch/sw_64/net/bpf_jit_comp.c b/arch/sw_64/net/bpf_jit_comp.c index a4f273e7725d..057461689832 100644 --- a/arch/sw_64/net/bpf_jit_comp.c +++ b/arch/sw_64/net/bpf_jit_comp.c @@ -24,13 +24,18 @@ #include #include #include +#include #include +#include +#include #include "bpf_jit.h" #define TCALL_CNT (MAX_BPF_JIT_REG + 0) - +#define SW64_FENTRY_NINSNS 5 +#define SW64_MAX_REG_ARGS 6 +#define STACK_ALIGN 16 static const int bpf2sw64[] = { /* return value from in-kernel function, and exit value from eBPF */ [BPF_REG_0] = SW64_BPF_REG_V0, @@ -61,6 +66,7 @@ struct jit_ctx { int *insn_offset; // [bpf_insn_idx] = jited_insn_idx int exentry_idx; u32 *image; // JITed instruction + u32 *ro_image; u32 stack_size; }; @@ -486,10 +492,11 @@ static int offset_to_epilogue(const struct jit_ctx *ctx) } /* For tail call, jump to set up function call stack */ -#define PROLOGUE_OFFSET 11 +#define PROLOGUE_OFFSET (11 + SW64_FENTRY_NINSNS) static void build_prologue(struct jit_ctx *ctx, bool was_classic) { + int i; const u8 r6 = bpf2sw64[BPF_REG_6]; const u8 r7 = bpf2sw64[BPF_REG_7]; const u8 r8 = bpf2sw64[BPF_REG_8]; @@ -497,6 +504,10 @@ static void build_prologue(struct jit_ctx *ctx, bool was_classic) const u8 fp = bpf2sw64[BPF_REG_FP]; const u8 tcc = bpf2sw64[TCALL_CNT]; + /* nops reserved for fentry call */ + for (i = 0; i < SW64_FENTRY_NINSNS; i++) + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx); + /* Save callee-saved registers */ emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, -64), ctx); emit(SW64_BPF_STL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 0), ctx); @@ -1370,6 +1381,578 @@ static inline void bpf_flush_icache(void *start, void *end) flush_icache_range((unsigned long)start, (unsigned long)end); } +static int __patch_insn_write(void *addr, const void *insn, size_t len) +{ + return copy_to_kernel_nofault(addr, insn, len); +} + +int patch_insn_write(void *addr, const void *insn, size_t len) +{ + size_t size; + int ret; + + while (len) { + size = min(len, PAGE_SIZE - offset_in_page(addr)); + + ret = __patch_insn_write(addr, insn, size); + if (ret) + return ret; + + addr += size; + insn += size; + len -= size; + } + + return 0; +} + +int sw64_insn_copy(void *addr, const void *insns, size_t len) +{ + int ret; + + ret = patch_insn_write(addr, insns, len); + if (!ret) { + flush_icache_range((unsigned long)addr, (unsigned long)addr + len); + mb(); + } + + return ret; +} + +static int gen_call_or_nops(void *target, void *ip, u32 *insns, bool is_call) +{ + int i; + s64 offset; + s32 jmp_offset; + struct jit_ctx ctx = { + .image = insns, + .idx = 0, + }; + + if (!target) { + for (i = 0; i < SW64_FENTRY_NINSNS; i++) + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, + SW64_BPF_REG_ZR), &ctx); + return 0; + } + offset = (s64)((unsigned long)target - (unsigned long)ip); + if (offset >= -0x100000 && offset <= 0xfffff) { + jmp_offset = (s32)offset; + /* we must remember br in sw is 4 * disp, and -1 is for pc will add 1 when exec */ + jmp_offset = jmp_offset/4 - 1; + emit(SW64_BPF_BR(is_call ? SW64_BPF_REG_AT : SW64_BPF_REG_ZR, jmp_offset), &ctx); + } else { + pr_err("bpf-jit: target offset 0x%llx is out of range\n", offset); + return -ERANGE; + } + return 0; +} + +static void set_sw_nops(u32 *insns, int num) +{ + int i; + struct jit_ctx ctx = { + .image = insns, + .idx = 0, + }; + + for (i = 0; i < num; i++) + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), &ctx); + + return; + +} + +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, + void *old_addr, void *new_addr) +{ + + u32 old_insns[SW64_FENTRY_NINSNS], new_insns[SW64_FENTRY_NINSNS]; + bool is_call = poke_type == BPF_MOD_CALL; + int ret; + + if (!is_kernel_text((unsigned long)ip) && + !is_bpf_text_address((unsigned long)ip)) + return -EOPNOTSUPP; + + set_sw_nops(old_insns, SW64_FENTRY_NINSNS); + set_sw_nops(new_insns, SW64_FENTRY_NINSNS); + + ret = gen_call_or_nops(old_addr, ip, old_insns, is_call); + if (ret) + return ret; + /* if not same, old addr is wrong, maybe change illegal */ + if (memcmp(ip, old_insns, SW64_FENTRY_NINSNS * 4)) + return -EFAULT; + + ret = gen_call_or_nops(new_addr, ip, new_insns, is_call); + if (ret) + return ret; + + cpus_read_lock(); + mutex_lock(&text_mutex); + if (memcmp(ip, new_insns, SW64_FENTRY_NINSNS * 4)) + ret = sw64_insn_copy(ip, new_insns, SW64_FENTRY_NINSNS * 4); + mutex_unlock(&text_mutex); + cpus_read_unlock(); + + return ret; +} + +static int btf_func_model_nregs(const struct btf_func_model *m) +{ + int nregs = m->nr_args; + int i; + + /* extra registers needed for struct argument */ + for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) { + /* The arg_size is at most 16 bytes, enforced by the verifier. */ + if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) + nregs += (m->arg_size[i] + 7) / 8 - 1; + } + + return nregs; +} + +static void emit_sw64_call(u64 target, struct jit_ctx *ctx) +{ + unsigned long ip = (unsigned long)(ctx->ro_image + ctx->idx); + s64 offset = (s64)((unsigned long)target - (unsigned long)ip); + + if (offset >= -0x100000 && offset <= 0xfffff) { + s32 jmp_offset = (s32)offset; + /* we must remember br in sw is 4 * disp, and -1 is for pc will add 1 when exec */ + jmp_offset = jmp_offset/4 - 1; + emit(SW64_BPF_BR(SW64_BPF_REG_RA, jmp_offset), ctx); + } else { + emit_sw64_load_call_addr(SW64_BPF_REG_PV, target, ctx); + emit(SW64_BPF_CALL(SW64_BPF_REG_RA, SW64_BPF_REG_PV), ctx); + } + +} + +static void save_args(struct jit_ctx *ctx, int args_off, int nregs) +{ + int i; + + for (i = 0; i < nregs; i++) { + if (i < SW64_MAX_REG_ARGS) { + emit(SW64_BPF_STL(i + SW64_BPF_REG_A0, SW64_BPF_REG_FP, -args_off), ctx); + } else { + emit(SW64_BPF_LDL(SW64_BPF_REG_T0, + SW64_BPF_REG_FP, 16 + (i - SW64_MAX_REG_ARGS) * 8), ctx); + emit(SW64_BPF_STL(SW64_BPF_REG_T0, SW64_BPF_REG_FP, -args_off), ctx); + } + args_off -= 8; + } +} + +static void restore_args(struct jit_ctx *ctx, int args_off, int nr_reg_args) +{ + int i; + + for (i = 0; i < nr_reg_args; i++) { + emit(SW64_BPF_LDL(i + SW64_BPF_REG_A0, SW64_BPF_REG_FP, -args_off), ctx); + args_off -= 8; + } +} + +static void restore_stack_args(int nr_stack_args, int args_off, int stk_arg_off, + struct jit_ctx *ctx) +{ + int i; + + for (i = 0; i < nr_stack_args; i++) { + emit(SW64_BPF_LDL(SW64_BPF_REG_T0, + SW64_BPF_REG_FP, -(args_off - SW64_MAX_REG_ARGS * 8)), ctx); + emit(SW64_BPF_STL(SW64_BPF_REG_T0, SW64_BPF_REG_FP, -stk_arg_off), ctx); + args_off -= 8; + stk_arg_off -= 8; + } +} + +void *bpf_arch_text_copy(void *dst, void *src, size_t len) +{ + int ret; + + mutex_lock(&text_mutex); + ret = sw64_insn_copy(dst, src, len); + mutex_unlock(&text_mutex); + + if (ret) + return ERR_PTR(-EINVAL); + + return dst; +} + +int bpf_arch_text_invalidate(void *dst, size_t len) +{ + int ret; + void *image = kzalloc(len, GFP_KERNEL); + + mutex_lock(&text_mutex); + ret = sw64_insn_copy(dst, image, len); + mutex_unlock(&text_mutex); + + kfree(image); + return ret; +} + +static void sw64_invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, + int args_off, int retval_off, int run_ctx_off, bool save_ret) +{ + u32 *branch; + u64 enter_prog; + u64 exit_prog; + struct bpf_prog *p = l->link.prog; + int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); + + enter_prog = (u64)bpf_trampoline_enter(p); + exit_prog = (u64)bpf_trampoline_exit(p); + + if (l->cookie == 0) { + /* if cookie is zero, one instruction is enough to store it */ + emit(SW64_BPF_STL(SW64_BPF_REG_ZR, + SW64_BPF_REG_FP, -run_ctx_off + cookie_off), ctx); + } else { + emit_sw64_ldu64(SW64_BPF_REG_T0, l->cookie, ctx); + emit(SW64_BPF_STL(SW64_BPF_REG_T0, + SW64_BPF_REG_FP, -run_ctx_off + cookie_off), ctx); + } + + /* arg1: prog */ + emit_sw64_ldu64(SW64_BPF_REG_A0, (const u64)p, ctx); + /* arg2: &run_ctx */ + emit(SW64_BPF_LDI(SW64_BPF_REG_A1, SW64_BPF_REG_FP, -run_ctx_off), ctx); + emit_sw64_call(enter_prog, ctx); + + /* save return value to callee saved register S0 , V0 is return value for sw64 */ + emit(SW64_BPF_LDI(SW64_BPF_REG_S0, SW64_BPF_REG_V0, 0), ctx); + + /* if (__bpf_prog_enter(prog) == 0) + * goto skip_exec_of_prog; + */ + branch = ctx->image + ctx->idx; + /* nop reserved for conditional jump */ + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx); + + /* must use BPF_REG_1(SW64_BPF_REG_A0), this is defined in clang */ + emit(SW64_BPF_LDI(SW64_BPF_REG_A0, SW64_BPF_REG_FP, -args_off), ctx); + if (!p->jited) + emit_sw64_ldu64(SW64_BPF_REG_A1, (const u64)p->insnsi, ctx); + emit_sw64_call((const u64)p->bpf_func, ctx); + + if (save_ret) + emit(SW64_BPF_STL(SW64_BPF_REG_V0, SW64_BPF_REG_FP, -retval_off), ctx); + + if (ctx->image) { + /* we must remember pc will add 1 when exec in sw */ + int offset = ctx->image + ctx->idx - branch - 1; + *branch = SW64_BPF_BEQ(SW64_BPF_REG_V0, offset); + } + + /* arg1: prog */ + emit_sw64_ldu64(SW64_BPF_REG_A0, (const u64)p, ctx); + /* arg2: start time */ + emit(SW64_BPF_LDI(SW64_BPF_REG_A1, SW64_BPF_REG_S0, 0), ctx); + /* arg3: &run_ctx */ + emit(SW64_BPF_LDI(SW64_BPF_REG_A2, SW64_BPF_REG_FP, -run_ctx_off), ctx); + emit_sw64_call(exit_prog, ctx); +} + +static void sw64_invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, + int args_off, int retval_off, int run_ctx_off, u32 **branches) +{ + int i; + + /* + * The first fmod_ret program will receive a garbage return value. + * Set this to 0 to avoid confusing the program. + */ + emit(SW64_BPF_STL(SW64_BPF_REG_ZR, SW64_BPF_REG_FP, -retval_off), ctx); + for (i = 0; i < tl->nr_links; i++) { + sw64_invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, + run_ctx_off, true); + /* if (*(u64 *)(sp + retval_off) != 0) + * goto do_fexit; + */ + emit(SW64_BPF_LDL(SW64_BPF_REG_T0, SW64_BPF_REG_FP, -retval_off), ctx); + /* + * Save the location of branch, and generate a nop. + * This nop will be replaced with a BNE later. + */ + branches[i] = ctx->image + ctx->idx; + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx); + } +} + +static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, + struct bpf_tramp_links *tlinks, void *func_addr, + int nregs, u32 flags) +{ + int i, offset; + u32 **branches = NULL; + int stack_size = 0; + int retval_off, args_off, nregs_off, ip_off, run_ctx_off, sreg_off, stk_arg_off; + struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; + struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; + struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; + bool save_ret; + void *orig_call = func_addr; + + /* Two types of generated trampoline stack layout: + * + * 1. trampoline called from function entry + * -------------------------------------- + * FP + 8 [ RA to parent func ] return address to parent + * function + * FP + 0 [ FP of parent func ] frame pointer of parent + * function + * FP - 8 [ R28 (BPF_AT) to traced func ] return address of traced + * function + * FP - 16 [ FP of traced func ] frame pointer of traced + * function + * FP - 24 [ GP of traced func ] global pointer of traced + * function + * -------------------------------------- + * + * 2. trampoline called directly + * -------------------------------------- + * FP - 8 [ RA to caller func ] return address to caller + * function + * FP - 16 [ FP of caller func ] frame pointer of caller + * function + * FP - 24 [ GP of caller func ] global pointer of caller + * function + * -------------------------------------- + * + * FP - retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or + * BPF_TRAMP_F_RET_FENTRY_RET + * [ argN ] + * [ ... ] + * FP - args_off [ arg1 ] + * + * FP - nregs_off [ regs count ] + * + * FP - ip_off [ traced func ] BPF_TRAMP_F_IP_ARG + * + * FP - run_ctx_off [ bpf_tramp_run_ctx ] + * + * FP - sreg_off [ callee saved reg ] + * + * [ pads ] pads for 16 bytes alignment + * + * [ stack_argN ] + * [ ... ] + * FP - stk_arg_off [ stack_arg1 ] BPF_TRAMP_F_CALL_ORIG + */ + + if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY)) + return -EOPNOTSUPP; + + /* room of trampoline frame to store return address, frame pointer and GP */ + stack_size += 24; + + save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); + if (save_ret) { + stack_size += 8; /* Save (BPF R0) or SW A0, in sw64, they are the same */ + retval_off = stack_size; + } + + stack_size += nregs * 8; + args_off = stack_size; + + stack_size += 8; + nregs_off = stack_size; + + if (flags & BPF_TRAMP_F_IP_ARG) { + stack_size += 8; + ip_off = stack_size; + } + + stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); + run_ctx_off = stack_size; + + stack_size += 8; + sreg_off = stack_size; + + if ((flags & BPF_TRAMP_F_CALL_ORIG) && (nregs - SW64_MAX_REG_ARGS > 0)) + stack_size += (nregs - SW64_MAX_REG_ARGS) * 8; + + stack_size = round_up(stack_size, STACK_ALIGN); + + /* room for args on stack must be at the top of stack */ + stk_arg_off = stack_size; + + if (func_addr) { + /* For the trampoline called from function entry, + * the frame of traced function and the frame of + * trampoline need to be considered. + */ + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, -16), ctx); + emit(SW64_BPF_STL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 8), ctx); + emit(SW64_BPF_STL(SW64_BPF_REG_FP, SW64_BPF_REG_SP, 0), ctx); + emit(SW64_BPF_LDI(SW64_BPF_REG_FP, SW64_BPF_REG_SP, 16), ctx); + + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, -stack_size), ctx); + emit(SW64_BPF_STL(SW64_BPF_REG_AT, SW64_BPF_REG_SP, stack_size - 8), ctx); + emit(SW64_BPF_STL(SW64_BPF_REG_FP, SW64_BPF_REG_SP, stack_size - 16), ctx); + emit(SW64_BPF_STL(SW64_BPF_REG_GP, SW64_BPF_REG_SP, stack_size - 24), ctx); + emit(SW64_BPF_LDI(SW64_BPF_REG_FP, SW64_BPF_REG_SP, stack_size), ctx); + } else { + /* For the trampoline called directly, just handle + * the frame of trampoline. + */ + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, -stack_size), ctx); + emit(SW64_BPF_STL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, stack_size - 8), ctx); + emit(SW64_BPF_STL(SW64_BPF_REG_FP, SW64_BPF_REG_SP, stack_size - 16), ctx); + emit(SW64_BPF_STL(SW64_BPF_REG_GP, SW64_BPF_REG_SP, stack_size - 24), ctx); + emit(SW64_BPF_LDI(SW64_BPF_REG_FP, SW64_BPF_REG_SP, stack_size), ctx); + } + + /* + * callee saved register S0 to pass start time, + * we need to remember it in invoke_bpf_prog + */ + emit(SW64_BPF_STL(SW64_BPF_REG_S0, SW64_BPF_REG_FP, -sreg_off), ctx); + + /* store ip address of the traced function */ + if (flags & BPF_TRAMP_F_IP_ARG) { + emit_sw64_ldu64(SW64_BPF_REG_T0, (const u64)func_addr, ctx); + emit(SW64_BPF_STL(SW64_BPF_REG_T0, SW64_BPF_REG_FP, -ip_off), ctx); + } + + emit(SW64_BPF_LDI(SW64_BPF_REG_T0, SW64_BPF_REG_ZR, nregs), ctx); + emit(SW64_BPF_STL(SW64_BPF_REG_T0, SW64_BPF_REG_FP, -nregs_off), ctx); + + save_args(ctx, args_off, nregs); + + if (flags & BPF_TRAMP_F_SKIP_FRAME) + orig_call += MCOUNT_INSN_SIZE; + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + emit_sw64_ldu64(SW64_BPF_REG_A0, (const u64)im, ctx); + emit_sw64_call((const u64)__bpf_tramp_enter, ctx); + } + + for (i = 0; i < fentry->nr_links; i++) + sw64_invoke_bpf_prog(ctx, fentry->links[i], args_off, retval_off, + run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET); + + if (fmod_ret->nr_links) { + branches = kcalloc(fmod_ret->nr_links, sizeof(u32 *), GFP_KERNEL); + if (!branches) + return -ENOMEM; + + sw64_invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, run_ctx_off, branches); + } + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + restore_args(ctx, args_off, min_t(int, nregs, SW64_MAX_REG_ARGS)); + restore_stack_args(nregs - SW64_MAX_REG_ARGS, args_off, stk_arg_off, ctx); + /* call original func */ + emit_sw64_call((const u64)orig_call, ctx); + /* store return value */ + emit(SW64_BPF_STL(SW64_BPF_REG_V0, SW64_BPF_REG_FP, -retval_off), ctx); + /* reserve a nop for bpf_tramp_image_put */ + im->ip_after_call = ctx->image + ctx->idx; + /* reserved 16 nop for long jmp, that is enough */ + for (i = 0; i < 16; i++) + emit(SW64_BPF_BIS_REG(SW64_BPF_REG_ZR, + SW64_BPF_REG_ZR, SW64_BPF_REG_ZR), ctx); + } + + for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) { + /* we must remember pc will add 1 when exec in sw */ + offset = ctx->image + ctx->idx - branches[i] - 1; + *branches[i] = SW64_BPF_BNE(SW64_BPF_REG_T0, offset); + } + + for (i = 0; i < fexit->nr_links; i++) + sw64_invoke_bpf_prog(ctx, fexit->links[i], args_off, + retval_off, run_ctx_off, false); + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + im->ip_epilogue = ctx->image + ctx->idx; + /* for the first pass, assume the worst case */ + emit_sw64_ldu64(SW64_BPF_REG_A0, (const u64)im, ctx); + emit_sw64_call((const u64)__bpf_tramp_exit, ctx); + } + + if (flags & BPF_TRAMP_F_RESTORE_REGS) + restore_args(ctx, args_off, min_t(int, nregs, SW64_MAX_REG_ARGS)); + + if (save_ret) + emit(SW64_BPF_LDL(SW64_BPF_REG_V0, SW64_BPF_REG_FP, -retval_off), ctx); + + /* callee saved register S0 to transmit start time, so use this reg, now we restore it */ + emit(SW64_BPF_LDL(SW64_BPF_REG_S0, SW64_BPF_REG_FP, -sreg_off), ctx); + + if (func_addr) { + /* trampoline called from function entry */ + emit(SW64_BPF_LDL(SW64_BPF_REG_AT, SW64_BPF_REG_SP, stack_size - 8), ctx); + emit(SW64_BPF_LDL(SW64_BPF_REG_FP, SW64_BPF_REG_SP, stack_size - 16), ctx); + emit(SW64_BPF_LDL(SW64_BPF_REG_GP, SW64_BPF_REG_SP, stack_size - 24), ctx); + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, stack_size), ctx); + + emit(SW64_BPF_LDL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, 8), ctx); + emit(SW64_BPF_LDL(SW64_BPF_REG_FP, SW64_BPF_REG_SP, 0), ctx); + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, 16), ctx); + + if (flags & BPF_TRAMP_F_SKIP_FRAME) + /* return to parent function */ + emit(SW64_BPF_RET(SW64_BPF_REG_RA), ctx); + else + /* return to traced function */ + emit(SW64_BPF_RET(SW64_BPF_REG_AT), ctx); + } else { + /* trampoline called directly */ + emit(SW64_BPF_LDL(SW64_BPF_REG_RA, SW64_BPF_REG_SP, stack_size - 8), ctx); + emit(SW64_BPF_LDL(SW64_BPF_REG_FP, SW64_BPF_REG_SP, stack_size - 16), ctx); + emit(SW64_BPF_LDL(SW64_BPF_REG_GP, SW64_BPF_REG_SP, stack_size - 24), ctx); + emit(SW64_BPF_LDI(SW64_BPF_REG_SP, SW64_BPF_REG_SP, stack_size), ctx); + + emit(SW64_BPF_RET(SW64_BPF_REG_RA), ctx); + } + + kfree(branches); + + return ctx->idx; +} + +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, + const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, + void *func_addr) +{ + int ret; + int nregs; + struct jit_ctx ctx = { + .image = NULL, + .ro_image = image, + .idx = 0, + }; + + nregs = btf_func_model_nregs(m); + + ret = __arch_prepare_bpf_trampoline(&ctx, im, tlinks, func_addr, nregs, flags); + if (ret < 0) + return ret; + + if (ret * SW64_INSN_SIZE > (long)image_end - (long)image) + return -EFBIG; + + ctx.image = image; + ctx.idx = 0; + + ret = __arch_prepare_bpf_trampoline(&ctx, im, tlinks, func_addr, nregs, flags); + if (ret < 0) + goto out; + +out: + return ret < 0 ? ret : ret * SW64_INSN_SIZE; +} + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_prog *tmp, *orig_prog = prog; -- Gitee