From ba3f07844a9a695a890c230104fb91811d8a0776 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 1 Aug 2025 13:03:41 +0800 Subject: [PATCH 1/8] irqchip/gic-v3: Refactor ISB + EOIR at ack time [Upstream commit 6efb50923771f392122f5ce69dfc43b08f16e449] There are cases where a context synchronization event is necessary between an IRQ being raised and being handled, and there are races such that we cannot rely upon the exception entry being subsequent to the interrupt being raised. To fix this, we place an ISB between a read of IAR and the subsequent invocation of an IRQ handler. When EOI mode 1 is in use, we need to EOI an interrupt prior to invoking its handler, and we have a write to EOIR for this. As this write to EOIR requires an ISB, and this is provided by the gic_write_eoir() helper, we omit the usual ISB in this case, with the logic being: | if (static_branch_likely(&supports_deactivate_key)) | gic_write_eoir(irqnr); | else | isb(); This is somewhat opaque, and it would be a little clearer if there were an unconditional ISB, with only the write to EOIR being conditional, e.g. | if (static_branch_likely(&supports_deactivate_key)) | write_gicreg(irqnr, ICC_EOIR1_EL1); | | isb(); This patch rewrites the code that way, with this logic factored into a new helper function with comments explaining what the ISB is for, as were originally laid out in commit: 39a06b67c2c1256b ("irqchip/gic: Ensure we have an ISB between ack and ->handle_irq") Note that since then, we removed the IAR polling in commit: 342677d70ab92142 ("irqchip/gic-v3: Remove acknowledge loop") ... which removed one of the two race conditions. For consistency, other portions of the driver are made to manipulate EOIR using write_gicreg() and explcit ISBs, and the gic_write_eoir() helper function is removed. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220513133038.226182-3-mark.rutland@arm.com Signed-off-by: Shi Yang --- arch/arm/include/asm/arch_gicv3.h | 7 +---- drivers/irqchip/irq-gic-v3.c | 43 ++++++++++++++++++++++++------- 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index 21f2ec96cc96..340352c77252 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -48,6 +48,7 @@ static inline u32 read_ ## a64(void) \ return read_sysreg(a32); \ } \ +CPUIF_MAP(ICC_EOIR1, ICC_EOIR1_EL1) CPUIF_MAP(ICC_PMR, ICC_PMR_EL1) CPUIF_MAP(ICC_AP0R0, ICC_AP0R0_EL1) CPUIF_MAP(ICC_AP0R1, ICC_AP0R1_EL1) @@ -63,12 +64,6 @@ CPUIF_MAP(ICC_AP1R3, ICC_AP1R3_EL1) /* Low-level accessors */ -static inline void gic_write_eoir(u32 irq) -{ - write_sysreg(irq, ICC_EOIR1); - isb(); -} - static inline void gic_write_dir(u32 val) { write_sysreg(val, ICC_DIR); diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 39a854ec93ca..75447d091a23 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -543,7 +543,8 @@ static void gic_irq_nmi_teardown(struct irq_data *d) static void gic_eoi_irq(struct irq_data *d) { - gic_write_eoir(gic_irq(d)); + write_gicreg(gic_irq(d), ICC_EOIR1_EL1); + isb(); } static void gic_eoimode1_eoi_irq(struct irq_data *d) @@ -627,10 +628,38 @@ static void gic_deactivate_unhandled(u32 irqnr) if (irqnr < 8192) gic_write_dir(irqnr); } else { - gic_write_eoir(irqnr); + write_gicreg(irqnr, ICC_EOIR1_EL1); + isb(); } } +/* + * Follow a read of the IAR with any HW maintenance that needs to happen prior + * to invoking the relevant IRQ handler. We must do two things: + * + * (1) Ensure instruction ordering between a read of IAR and subsequent + * instructions in the IRQ handler using an ISB. + * + * It is possible for the IAR to report an IRQ which was signalled *after* + * the CPU took an IRQ exception as multiple interrupts can race to be + * recognized by the GIC, earlier interrupts could be withdrawn, and/or + * later interrupts could be prioritized by the GIC. + * + * For devices which are tightly coupled to the CPU, such as PMUs, a + * context synchronization event is necessary to ensure that system + * register state is not stale, as these may have been indirectly written + * *after* exception entry. + * + * (2) Deactivate the interrupt when EOI mode 1 is in use. + */ +static inline void gic_complete_ack(u32 irqnr) +{ + if (static_branch_likely(&supports_deactivate_key)) + write_gicreg(irqnr, ICC_EOIR1_EL1); + + isb(); +} + static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) { bool irqs_enabled = interrupts_enabled(regs); @@ -639,10 +668,7 @@ static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) if (irqs_enabled) nmi_enter(); - if (static_branch_likely(&supports_deactivate_key)) - gic_write_eoir(irqnr); - else - isb(); + gic_complete_ack(irqnr); /* * Leave the PSR.I bit set to prevent other NMIs to be @@ -713,10 +739,7 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs gic_arch_enable_irqs(); } - if (static_branch_likely(&supports_deactivate_key)) - gic_write_eoir(irqnr); - else - isb(); + gic_complete_ack(irqnr); if (handle_domain_irq(gic_data.domain, irqnr, regs)) { WARN_ONCE(true, "Unexpected interrupt received!\n"); -- Gitee From 58ac51865e0a6f529a85a43ab1052e2cabde8922 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 1 Aug 2025 13:03:42 +0800 Subject: [PATCH 2/8] irqchip/gic-v3: Fix priority mask handling [Upstream commit 614ab80c96474682157cabb14f8c8602b3422e90] When a kernel is built with CONFIG_ARM64_PSEUDO_NMI=y and pseudo-NMIs are enabled at runtime, GICv3's gic_handle_irq() can leave DAIF and ICC_PMR_EL1 in an unexpected state in some cases, breaking subsequent usage of local_irq_enable() and resulting in softirqs being run with IRQs erroneously masked (possibly resulting in deadlocks). This can happen when an IRQ exception is taken from a context where regular IRQs were unmasked, and either: (1) ICC_IAR1_EL1 indicates a special INTID (e.g. as a result of an IRQ being withdrawn since the IRQ exception was taken). (2) ICC_IAR1_EL1 and ICC_RPR_EL1 indicate an NMI was acknowledged. When an NMI is taken from a context where regular IRQs were masked, there is no problem. When CONFIG_ARM64_DEBUG_PRIORITY_MASKING=y, this can be detected with perf, e.g. | # ./perf record -a -g -e cycles:k ls -alR / > /dev/null 2>&1 | ------------[ cut here ]------------ | WARNING: CPU: 0 PID: 14 at arch/arm64/include/asm/irqflags.h:32 arch_local_irq_enable+0x4c/0x6c | Modules linked in: | CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 5.18.0-rc5-00004-g876c38e3d20b #12 | Hardware name: linux,dummy-virt (DT) | pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : arch_local_irq_enable+0x4c/0x6c | lr : __do_softirq+0x110/0x5d8 | sp : ffff8000080bbbc0 | pmr_save: 000000f0 | x29: ffff8000080bbbc0 x28: ffff316ac3a6ca40 x27: 0000000000000000 | x26: 0000000000000000 x25: ffffa04611c06008 x24: ffffa04611c06008 | x23: 0000000040400005 x22: 0000000000000200 x21: ffff8000080bbe20 | x20: ffffa0460fe10320 x19: 0000000000000009 x18: 0000000000000000 | x17: ffff91252dfa9000 x16: ffff800008004000 x15: 0000000000004000 | x14: 0000000000000028 x13: ffffa0460fe17578 x12: ffffa0460fed4294 | x11: ffffa0460fedc168 x10: ffffffffffffff80 x9 : ffffa0460fe10a70 | x8 : ffffa0460fedc168 x7 : 000000000000b762 x6 : 00000000057c3bdf | x5 : ffff8000080bbb18 x4 : 0000000000000000 x3 : 0000000000000001 | x2 : ffff91252dfa9000 x1 : 0000000000000060 x0 : 00000000000000f0 | Call trace: | arch_local_irq_enable+0x4c/0x6c | __irq_exit_rcu+0x180/0x1ac | irq_exit_rcu+0x1c/0x44 | el1_interrupt+0x4c/0xe4 | el1h_64_irq_handler+0x18/0x24 | el1h_64_irq+0x74/0x78 | smpboot_thread_fn+0x68/0x2c0 | kthread+0x124/0x130 | ret_from_fork+0x10/0x20 | irq event stamp: 193241 | hardirqs last enabled at (193240): [] __do_softirq+0x10c/0x5d8 | hardirqs last disabled at (193241): [] el1_dbg+0x24/0x90 | softirqs last enabled at (193234): [] __do_softirq+0x470/0x5d8 | softirqs last disabled at (193239): [] __irq_exit_rcu+0x180/0x1ac | ---[ end trace 0000000000000000 ]--- The necessary manipulation of DAIF and ICC_PMR_EL1 depends on the interrupted context, but the structure of gic_handle_irq() makes this also depend on whether the GIC reports an IRQ, NMI, or special INTID: * When the interrupted context had regular IRQs masked (and hence the interrupt must be an NMI), the entry code performs the NMI entry/exit and gic_handle_irq() should return with DAIF and ICC_PMR_EL1 unchanged. This is handled correctly today. * When the interrupted context had regular IRQs unmasked, the entry code performs IRQ entry/exit, but expects gic_handle_irq() to always update ICC_PMR_EL1 and DAIF.IF to unmask NMIs (but not regular IRQs) prior to returning (which it must do prior to invoking any regular IRQ handler). This unbalanced calling convention is necessary because we don't know whether an NMI has been taken until acknowledged by a read from ICC_IAR1_EL1, and so we need to perform the read with NMI masked in case an NMI has been taken (and needs to be handled with NMIs masked). Unfortunately, this is not handled consistently: - When ICC_IAR1_EL1 reports a special INTID, gic_handle_irq() returns immediately without manipulating ICC_PMR_EL1 and DAIF. - When RPR_EL1 indicates an NMI, gic_handle_irq() calls gic_handle_nmi() to invoke the NMI handler, then returns without manipulating ICC_PMR_EL1 and DAIF. - For regular IRQs, gic_handle_irq() manipulates ICC_PMR_EL1 and DAIF prior to invoking the IRQ handler. There were related problems with special INTID handling in the past, where if an exception was taken from a context with regular IRQs masked and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously unmask NMIs in NMI context permitted an unexpected nested NMI. That case specifically was fixed by commit: a97709f563a078e2 ("irqchip/gic-v3: Do not enable irqs when handling spurious interrups") ... but unfortunately that commit added an inverse problem, where if an exception was taken from a context with regular IRQs *unmasked* and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously fail to unmask NMIs (and consequently regular IRQs could not be unmasked during softirq processing). Before and after that commit, if an NMI was taken from a context with regular IRQs unmasked gic_handle_irq() would not unmask NMIs prior to returning, leading to the same problem with softirq handling. This patch fixes this by restructuring gic_handle_irq(), splitting it into separate irqson/irqsoff helper functions which consistently perform the DAIF + ICC_PMR1_EL1 manipulation based upon the interrupted context, regardless of the event indicated by ICC_IAR1_EL1. The special INTID handling is moved into the low-level IRQ/NMI handler invocation helper functions, so that early returns don't prevent the required manipulation of DAIF + ICC_PMR_EL1. Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs") Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Thomas Gleixner Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20220513133038.226182-4-mark.rutland@arm.com Signed-off-by: Shi Yang --- drivers/irqchip/irq-gic-v3.c | 147 +++++++++++++++++++++-------------- 1 file changed, 88 insertions(+), 59 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 75447d091a23..b71cfb7a0421 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -660,78 +660,67 @@ static inline void gic_complete_ack(u32 irqnr) isb(); } -static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) +static bool gic_rpr_is_nmi_prio(void) { - bool irqs_enabled = interrupts_enabled(regs); - int err; - - if (irqs_enabled) - nmi_enter(); - - gic_complete_ack(irqnr); + if (!gic_supports_nmi()) + return false; - /* - * Leave the PSR.I bit set to prevent other NMIs to be - * received while handling this one. - * PSR.I will be restored when we ERET to the - * interrupted context. - */ - err = handle_domain_nmi(gic_data.domain, irqnr, regs); - if (err) - gic_deactivate_unhandled(irqnr); + return unlikely(gic_read_rpr() == GICD_INT_NMI_PRI); +} - if (irqs_enabled) - nmi_exit(); +static bool gic_irqnr_is_special(u32 irqnr) +{ + return irqnr >= 1020 && irqnr <= 1023; } -static u32 do_read_iar(struct pt_regs *regs) +static void __gic_handle_irq(u32 irqnr, struct pt_regs *regs) { - u32 iar; + if (gic_irqnr_is_special(irqnr)) + return; - if (gic_supports_nmi() && unlikely(!interrupts_enabled(regs))) { - u64 pmr; + gic_complete_ack(irqnr); - /* - * We were in a context with IRQs disabled. However, the - * entry code has set PMR to a value that allows any - * interrupt to be acknowledged, and not just NMIs. This can - * lead to surprising effects if the NMI has been retired in - * the meantime, and that there is an IRQ pending. The IRQ - * would then be taken in NMI context, something that nobody - * wants to debug twice. - * - * Until we sort this, drop PMR again to a level that will - * actually only allow NMIs before reading IAR, and then - * restore it to what it was. - */ - pmr = gic_read_pmr(); - gic_pmr_mask_irqs(); - isb(); + if (handle_domain_irq(gic_data.domain, irqnr, regs)) { + WARN_ONCE(true, "Unexpected interrupt received!\n"); + gic_deactivate_unhandled(irqnr); + } +} - iar = gic_read_iar(); +static void __gic_handle_nmi(u32 irqnr, struct pt_regs *regs) +{ + if (gic_irqnr_is_special(irqnr)) + return; - gic_write_pmr(pmr); - } else { - iar = gic_read_iar(); - } + gic_complete_ack(irqnr); - return iar; + if (handle_domain_nmi(gic_data.domain, irqnr, regs)) + gic_deactivate_unhandled(irqnr); } -static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) +/* + * An exception has been taken from a context with IRQs enabled, and this could + * be an IRQ or an NMI. + * + * The entry code called us with DAIF.IF set to keep NMIs masked. We must clear + * DAIF.IF (and update ICC_PMR_EL1 to mask regular IRQs) prior to returning, + * after handling any NMI but before handling any IRQ. + * + * The entry code has performed IRQ entry, and if an NMI is detected we must + * perform NMI entry/exit around invoking the handler. + */ +static void __gic_handle_irq_from_irqson(struct pt_regs *regs) { + bool is_nmi; u32 irqnr; - irqnr = do_read_iar(regs); + irqnr = gic_read_iar(); - /* Check for special IDs first */ - if ((irqnr >= 1020 && irqnr <= 1023)) - return; + is_nmi = gic_rpr_is_nmi_prio(); - if (gic_supports_nmi() && - unlikely(gic_read_rpr() == GICD_INT_RPR_PRI(GICD_INT_NMI_PRI))) { - gic_handle_nmi(irqnr, regs); - return; + if (is_nmi) { + nmi_enter(); + __gic_handle_nmi(irqnr, regs); + nmi_exit(); } if (gic_prio_masking_enabled()) { @@ -739,12 +728,52 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs gic_arch_enable_irqs(); } - gic_complete_ack(irqnr); + if (!is_nmi) + __gic_handle_irq(irqnr, regs); +} - if (handle_domain_irq(gic_data.domain, irqnr, regs)) { - WARN_ONCE(true, "Unexpected interrupt received!\n"); - gic_deactivate_unhandled(irqnr); - } +/* + * An exception has been taken from a context with IRQs disabled, which can only + * be an NMI. + * + * The entry code called us with DAIF.IF set to keep NMIs masked. We must leave + * DAIF.IF (and ICC_PMR_EL1) unchanged. + * + * The entry code has performed NMI entry. + */ +static void __gic_handle_irq_from_irqsoff(struct pt_regs *regs) +{ + u64 pmr; + u32 irqnr; + + /* + * We were in a context with IRQs disabled. However, the + * entry code has set PMR to a value that allows any + * interrupt to be acknowledged, and not just NMIs. This can + * lead to surprising effects if the NMI has been retired in + * the meantime, and that there is an IRQ pending. The IRQ + * would then be taken in NMI context, something that nobody + * wants to debug twice. + * + * Until we sort this, drop PMR again to a level that will + * actually only allow NMIs before reading IAR, and then + * restore it to what it was. + */ + pmr = gic_read_pmr(); + gic_pmr_mask_irqs(); + isb(); + irqnr = gic_read_iar(); + gic_write_pmr(pmr); + + __gic_handle_nmi(irqnr, regs); +} + +static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) +{ + if (unlikely(gic_supports_nmi() && !interrupts_enabled(regs))) + __gic_handle_irq_from_irqsoff(regs); + else + __gic_handle_irq_from_irqson(regs); } #ifdef CONFIG_FAST_IRQ -- Gitee From cfe06d1a1c5f54b493e961de8826c18c60d3599b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 1 Aug 2025 13:03:43 +0800 Subject: [PATCH 3/8] arm64/booting: Document boot requirements for FEAT_NMI In order to use FEAT_NMI we must be able to use ALLINT, require that it behave as though not trapped when it is present. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- Documentation/arm64/booting.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst index 7552dbc1cc54..92cf9645c3d1 100644 --- a/Documentation/arm64/booting.rst +++ b/Documentation/arm64/booting.rst @@ -270,6 +270,12 @@ Before jumping into the kernel, the following conditions must be met: having 0b1 set for the corresponding bit for each of the auxiliary counters present. + For CPUs with Non-maskable Interrupts (FEAT_NMI): + + - If the kernel is entered at EL1 and EL2 is present: + + - HCRX_EL2.TALLINT must be initialised to 0b0. + The requirements described above for CPU mode, caches, MMUs, architected timers, coherency and system registers apply to all CPUs. All CPUs must enter the kernel in the same exception level. -- Gitee From 1d4ddb41a73b6bf11d14dfe4a3d6c3f3a124aac4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 1 Aug 2025 13:03:44 +0800 Subject: [PATCH 4/8] arm64/sysreg: Add definitions for immediate versions of MSR ALLINT Encodings are provided for ALLINT which allow setting of ALLINT.ALLINT using an immediate rather than requiring that a register be loaded with the value to write. Since these don't currently fit within the scheme we have for sysreg generation add manual encodings like we currently do for other similar registers such as SVCR. Since it is required that these immediate versions be encoded with xzr as the source register provide asm wrapper which ensure this is the case. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- arch/arm64/include/asm/daifflags.h | 9 +++++++++ arch/arm64/include/asm/sysreg.h | 2 ++ 2 files changed, 11 insertions(+) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index cfdde3a56805..281b83efd2a2 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -17,6 +17,15 @@ #define DAIF_ERRCTX (PSR_I_BIT | PSR_A_BIT) #define DAIF_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) +static __always_inline void _allint_clear(void) +{ + asm volatile(__msr_s(SYS_ALLINT_CLR, "xzr")); +} + +static __always_inline void _allint_set(void) +{ + asm volatile(__msr_s(SYS_ALLINT_SET, "xzr")); +} /* mask/save/unmask/restore all exceptions, including interrupts. */ static inline void local_daif_mask(void) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9705f7abd428..38df245c212a 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -133,6 +133,8 @@ #define SYS_SVCR_SMSTART_SM_EL0 sys_reg(0, 3, 4, 3, 3) #define SYS_SVCR_SMSTOP_SMZA_EL0 sys_reg(0, 3, 4, 6, 3) +#define SYS_ALLINT_CLR sys_reg(0, 1, 4, 0, 0) +#define SYS_ALLINT_SET sys_reg(0, 1, 4, 1, 0) #define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2) #define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0) #define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2) -- Gitee From b83f2b1d4f2a49633cf653d56d801d256c858301 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 1 Aug 2025 13:03:45 +0800 Subject: [PATCH 5/8] arm64/asm: Introduce assembly macros for managing ALLINT In order to allow assembly code to ensure that not even superpriorty interrupts can preempt it provide macros for enabling and disabling ALLINT.ALLINT. This is not integrated into the existing DAIF macros since we do not always wish to manage ALLINT along with DAIF and the use of DAIF in the naming of the existing macros might lead to surprises if ALLINT is also managed. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- arch/arm64/include/asm/assembler.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 63db8e5ec9f8..cc71a8a7bf9d 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -32,6 +32,22 @@ wx\n .req w\n .endr + .macro disable_allint +#ifdef CONFIG_ARM64_NMI +alternative_if ARM64_HAS_NMI + msr_s SYS_ALLINT_SET, xzr +alternative_else_nop_endif +#endif + .endm + + .macro enable_allint +#ifdef CONFIG_ARM64_NMI +alternative_if ARM64_HAS_NMI + msr_s SYS_ALLINT_CLR, xzr +alternative_else_nop_endif +#endif + .endm + .macro save_and_disable_daif, flags mrs \flags, daif msr daifset, #0xf -- Gitee From a6c828fa85ab80a608881470701f56eb8a18ec53 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 1 Aug 2025 13:03:46 +0800 Subject: [PATCH 6/8] arm64/hyp-stub: Enable access to ALLINT In order to use NMIs we need to ensure that traps are disabled for it so update HCRX_EL2 to ensure that TALLINT is not set when we detect support for NMIs. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- arch/arm64/include/asm/sysreg.h | 3 +++ arch/arm64/kernel/head.S | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 38df245c212a..61ffa2557cb4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -363,6 +363,8 @@ #define SYS_PAR_EL1_F BIT(0) #define SYS_PAR_EL1_FST GENMASK(6, 1) +#define HCRX_EL2_TALLINT_MASK GENMASK(6, 6) + /*** Statistical Profiling Extension ***/ #define SMPRI_EL1_PRIORITY_MASK 0xf @@ -1343,6 +1345,7 @@ #define ID_AA64PFR0_EL0_32BIT_64BIT 0x2 /* id_aa64pfr1 */ +#define ID_AA64PFR1_NMI_SHIFT 36 #define ID_AA64PFR1_SME_SHIFT 24 #define ID_AA64PFR1_MPAMFRAC_SHIFT 16 #define ID_AA64PFR1_RASFRAC_SHIFT 12 diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 4704d00d3813..5f854b0e44ad 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -557,11 +557,24 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) msr sctlr_el2, x0 #ifdef CONFIG_ARM64_VHE + mrs x2, id_aa64pfr1_el1 + ubfx x2, x2, #ID_AA64PFR1_NMI_SHIFT, #4 + cbz x2, .Lskip_nmi +.Linit_nmi: + mrs x2, id_aa64mmfr1_el1 + ubfx x2, x2, #ID_AA64MMFR1_HCX_SHIFT, #4 + cbz x2, .Lskip_nmi + + mrs_s x2, SYS_HCRX_EL2 + bic x2, x2, #HCRX_EL2_TALLINT_MASK // Don't trap ALLINT + msr_s SYS_HCRX_EL2, x2 + /* * Check for VHE being present. For the rest of the EL2 setup, * x2 being non-zero indicates that we do have VHE, and that the * kernel is intended to run at EL2. */ +.Lskip_nmi: mrs x2, id_aa64mmfr1_el1 ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 #else -- Gitee From 7e196967f1c937cbf70b43562d038c2c1d773343 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 1 Aug 2025 13:03:47 +0800 Subject: [PATCH 7/8] arm64/cpufeature: Detect PE support for FEAT_NMI Use of FEAT_NMI requires that all the PEs in the system and the GIC have NMI support. This patch implements the PE part of that detection. In order to avoid problematic interactions between real and pseudo NMIs we disable the architected feature if the user has enabled pseudo NMIs on the command line. If this is done on a system where support for the architected feature is detected then a warning is printed during boot in order to help users spot what is likely to be a misconfiguration. In order to allow KVM to offer the feature to guests even if pseudo NMIs are in use by the host we have a separate feature for the raw feature which is used in KVM. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- arch/arm64/include/asm/cpucaps.h | 4 +- arch/arm64/include/asm/cpufeature.h | 6 +++ arch/arm64/include/asm/sysreg.h | 5 +++ arch/arm64/kernel/cpufeature.c | 67 ++++++++++++++++++++++++++++- 4 files changed, 80 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 0da53f0c4fbf..67634c23e850 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -84,7 +84,9 @@ #define ARM64_HAS_XCALL 76 #define ARM64_HAS_XINT 77 #define ARM64_WORKAROUND_PHYTIUM_FT3386 78 +#define ARM64_HAS_NMI 79 +#define ARM64_USES_NMI 80 -#define ARM64_NCAPS 80 +#define ARM64_NCAPS 81 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index dd7d18cfbd1e..a35d66c468ed 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -776,6 +776,12 @@ static inline bool system_supports_mte(void) cpus_have_const_cap(ARM64_MTE); } +static __always_inline bool system_uses_nmi(void) +{ + return IS_ENABLED(CONFIG_ARM64_NMI) && + cpus_have_const_cap(ARM64_USES_NMI); +} + static inline bool system_has_prio_mask_debugging(void) { return IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING) && diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 61ffa2557cb4..bc42251806c8 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1202,6 +1202,8 @@ #define SCTLR_EL1_BT1 (BIT(36)) #define SCTLR_EL1_BT0 (BIT(35)) +#define SCTLR_EL1_SPINTMASK (BIT(62)) +#define SCTLR_EL1_NMI (BIT(61)) #define SCTLR_EL1_UCI (BIT(26)) #define SCTLR_EL1_E0E (BIT(24)) #define SCTLR_EL1_SPAN (BIT(23)) @@ -1363,6 +1365,9 @@ #define ID_AA64PFR1_MTE_EL0 0x1 #define ID_AA64PFR1_MTE 0x2 +#define ID_AA64PFR1_NMI_IMP_DEF 0x1 +#define ID_AA64PFR1_NMI_IMP_NI 0x0 + /* id_aa64zfr0 */ #define ID_AA64ZFR0_F64MM_SHIFT 56 #define ID_AA64ZFR0_F32MM_SHIFT 52 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9cb9a209b63a..a3cb23c2c40a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -90,6 +90,7 @@ #include #include #include +#include /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ static unsigned long elf_hwcap __read_mostly; @@ -254,6 +255,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_BTI), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_BT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_NMI_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -2054,9 +2056,11 @@ static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_E0PD */ -#ifdef CONFIG_ARM64_PSEUDO_NMI +#if IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) || IS_ENABLED(CONFIG_ARM64_NMI) bool enable_pseudo_nmi; +#endif +#ifdef CONFIG_ARM64_PSEUDO_NMI static int __init early_enable_pseudo_nmi(char *p) { return strtobool(p, &enable_pseudo_nmi); @@ -2097,6 +2101,41 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_MTE */ +#ifdef CONFIG_ARM64_NMI +static bool use_nmi(const struct arm64_cpu_capabilities *entry, int scope) +{ + if (!has_cpuid_feature(entry, scope)) + return false; + + /* + * Having both real and pseudo NMIs enabled simultaneously is + * likely to cause confusion. Since pseudo NMIs must be + * enabled with an explicit command line option, if the user + * has set that option on a system with real NMIs for some + * reason assume they know what they're doing. + */ + if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && enable_pseudo_nmi) { + pr_info("Pseudo NMI enabled, not using architected NMI\n"); + return false; + } + + return true; +} + +static void nmi_enable(const struct arm64_cpu_capabilities *__unused) +{ + /* + * Enable use of NMIs controlled by ALLINT, SPINTMASK should + * be clear by default but make it explicit that we are using + * this mode. Ensure that ALLINT is clear first in order to + * avoid leaving things masked. + */ + _allint_clear(); + sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPINTMASK, SCTLR_EL1_NMI); + isb(); +} +#endif + static void elf_hwcap_fixup(void) { #ifdef CONFIG_ARM64_ERRATUM_1742098 @@ -2750,6 +2789,32 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_XINT, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_xint_support, + + }, +#endif +#ifdef CONFIG_ARM64_NMI + { + .desc = "Non-maskable Interrupts present", + .capability = ARM64_HAS_NMI, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR1_NMI_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64PFR1_NMI_IMP_DEF, + .matches = has_cpuid_feature, + }, + { + .desc = "Non-maskable Interrupts enabled", + .capability = ARM64_USES_NMI, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR1_NMI_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64PFR1_NMI_IMP_DEF, + .matches = use_nmi, + .cpu_enable = nmi_enable, }, #endif {}, -- Gitee From 8614d2eefc1e0bb7515ee800319cd0109e42b88e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 1 Aug 2025 13:03:53 +0800 Subject: [PATCH 8/8] arm64/nmi: Add Kconfig for NMI Since NMI handling is in some fairly hot paths we provide a Kconfig option which allows support to be compiled out when not needed. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: Shi Yang --- arch/arm64/Kconfig | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0f1accb7355f..434d46214462 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1990,6 +1990,22 @@ config ARM64_EPAN if the cpu does not implement the feature. endmenu +menu "ARMv8.8 architectural features" + +config ARM64_NMI + bool "Enable support for Non-maskable Interrupts (NMI)" + default y + help + Non-maskable interrupts are an architecture and GIC feature + which allow the system to configure some interrupts to be + configured to have superpriority, allowing them to be handled + before other interrupts and masked for shorter periods of time. + + The feature is detected at runtime, and will remain disabled + if the cpu does not implement the feature. It will also be + disabled if pseudo NMIs are enabled at runtime. +endmenu + config ARM64_SVE bool "ARM Scalable Vector Extension support" default y -- Gitee