diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst index 7552dbc1cc54c76d9b6ea3b6983d9ee800453d84..92cf9645c3d116b89280d836cdeb38aae7bd808e 100644 --- a/Documentation/arm64/booting.rst +++ b/Documentation/arm64/booting.rst @@ -270,6 +270,12 @@ Before jumping into the kernel, the following conditions must be met: having 0b1 set for the corresponding bit for each of the auxiliary counters present. + For CPUs with Non-maskable Interrupts (FEAT_NMI): + + - If the kernel is entered at EL1 and EL2 is present: + + - HCRX_EL2.TALLINT must be initialised to 0b0. + The requirements described above for CPU mode, caches, MMUs, architected timers, coherency and system registers apply to all CPUs. All CPUs must enter the kernel in the same exception level. diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index 21f2ec96cc96c5707161373e150f3ad0e1963ba3..340352c772521163a52ba258d674d59468cd4655 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -48,6 +48,7 @@ static inline u32 read_ ## a64(void) \ return read_sysreg(a32); \ } \ +CPUIF_MAP(ICC_EOIR1, ICC_EOIR1_EL1) CPUIF_MAP(ICC_PMR, ICC_PMR_EL1) CPUIF_MAP(ICC_AP0R0, ICC_AP0R0_EL1) CPUIF_MAP(ICC_AP0R1, ICC_AP0R1_EL1) @@ -63,12 +64,6 @@ CPUIF_MAP(ICC_AP1R3, ICC_AP1R3_EL1) /* Low-level accessors */ -static inline void gic_write_eoir(u32 irq) -{ - write_sysreg(irq, ICC_EOIR1); - isb(); -} - static inline void gic_write_dir(u32 val) { write_sysreg(val, ICC_DIR); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0f1accb7355f18ba2e2f5024b498957b17333d17..434d462144624598629546712163d154569707de 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1990,6 +1990,22 @@ config ARM64_EPAN if the cpu does not implement the feature. endmenu +menu "ARMv8.8 architectural features" + +config ARM64_NMI + bool "Enable support for Non-maskable Interrupts (NMI)" + default y + help + Non-maskable interrupts are an architecture and GIC feature + which allow the system to configure some interrupts to be + configured to have superpriority, allowing them to be handled + before other interrupts and masked for shorter periods of time. + + The feature is detected at runtime, and will remain disabled + if the cpu does not implement the feature. It will also be + disabled if pseudo NMIs are enabled at runtime. +endmenu + config ARM64_SVE bool "ARM Scalable Vector Extension support" default y diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 63db8e5ec9f8a915a7cbddfddaba6b4e08049d48..cc71a8a7bf9d68e6dd62ea261a6113990517fb83 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -32,6 +32,22 @@ wx\n .req w\n .endr + .macro disable_allint +#ifdef CONFIG_ARM64_NMI +alternative_if ARM64_HAS_NMI + msr_s SYS_ALLINT_SET, xzr +alternative_else_nop_endif +#endif + .endm + + .macro enable_allint +#ifdef CONFIG_ARM64_NMI +alternative_if ARM64_HAS_NMI + msr_s SYS_ALLINT_CLR, xzr +alternative_else_nop_endif +#endif + .endm + .macro save_and_disable_daif, flags mrs \flags, daif msr daifset, #0xf diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 0da53f0c4fbf645576af228432e216b40736f03d..67634c23e85059b46d03a99db16c3e12313adaeb 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -84,7 +84,9 @@ #define ARM64_HAS_XCALL 76 #define ARM64_HAS_XINT 77 #define ARM64_WORKAROUND_PHYTIUM_FT3386 78 +#define ARM64_HAS_NMI 79 +#define ARM64_USES_NMI 80 -#define ARM64_NCAPS 80 +#define ARM64_NCAPS 81 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index dd7d18cfbd1e4dae89e2e8131979718d4b4abff4..a35d66c468eddb681f716a20f792f6354406974c 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -776,6 +776,12 @@ static inline bool system_supports_mte(void) cpus_have_const_cap(ARM64_MTE); } +static __always_inline bool system_uses_nmi(void) +{ + return IS_ENABLED(CONFIG_ARM64_NMI) && + cpus_have_const_cap(ARM64_USES_NMI); +} + static inline bool system_has_prio_mask_debugging(void) { return IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING) && diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index cfdde3a56805959b8c074455ef7eff783a1ad2d2..281b83efd2a21b5dbd5fd9f470f5d5d7d90660d1 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -17,6 +17,15 @@ #define DAIF_ERRCTX (PSR_I_BIT | PSR_A_BIT) #define DAIF_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) +static __always_inline void _allint_clear(void) +{ + asm volatile(__msr_s(SYS_ALLINT_CLR, "xzr")); +} + +static __always_inline void _allint_set(void) +{ + asm volatile(__msr_s(SYS_ALLINT_SET, "xzr")); +} /* mask/save/unmask/restore all exceptions, including interrupts. */ static inline void local_daif_mask(void) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9705f7abd42871475d95408396aee49b8c9f34b2..bc42251806c881d370ef836f5a30f39f1f5ddeed 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -133,6 +133,8 @@ #define SYS_SVCR_SMSTART_SM_EL0 sys_reg(0, 3, 4, 3, 3) #define SYS_SVCR_SMSTOP_SMZA_EL0 sys_reg(0, 3, 4, 6, 3) +#define SYS_ALLINT_CLR sys_reg(0, 1, 4, 0, 0) +#define SYS_ALLINT_SET sys_reg(0, 1, 4, 1, 0) #define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2) #define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0) #define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2) @@ -361,6 +363,8 @@ #define SYS_PAR_EL1_F BIT(0) #define SYS_PAR_EL1_FST GENMASK(6, 1) +#define HCRX_EL2_TALLINT_MASK GENMASK(6, 6) + /*** Statistical Profiling Extension ***/ #define SMPRI_EL1_PRIORITY_MASK 0xf @@ -1198,6 +1202,8 @@ #define SCTLR_EL1_BT1 (BIT(36)) #define SCTLR_EL1_BT0 (BIT(35)) +#define SCTLR_EL1_SPINTMASK (BIT(62)) +#define SCTLR_EL1_NMI (BIT(61)) #define SCTLR_EL1_UCI (BIT(26)) #define SCTLR_EL1_E0E (BIT(24)) #define SCTLR_EL1_SPAN (BIT(23)) @@ -1341,6 +1347,7 @@ #define ID_AA64PFR0_EL0_32BIT_64BIT 0x2 /* id_aa64pfr1 */ +#define ID_AA64PFR1_NMI_SHIFT 36 #define ID_AA64PFR1_SME_SHIFT 24 #define ID_AA64PFR1_MPAMFRAC_SHIFT 16 #define ID_AA64PFR1_RASFRAC_SHIFT 12 @@ -1358,6 +1365,9 @@ #define ID_AA64PFR1_MTE_EL0 0x1 #define ID_AA64PFR1_MTE 0x2 +#define ID_AA64PFR1_NMI_IMP_DEF 0x1 +#define ID_AA64PFR1_NMI_IMP_NI 0x0 + /* id_aa64zfr0 */ #define ID_AA64ZFR0_F64MM_SHIFT 56 #define ID_AA64ZFR0_F32MM_SHIFT 52 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9cb9a209b63ae401c30d92a027bc5550e1207e91..a3cb23c2c40addcb25b9c77e9e20be356534a4a5 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -90,6 +90,7 @@ #include #include #include +#include /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ static unsigned long elf_hwcap __read_mostly; @@ -254,6 +255,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_BTI), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_BT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_NMI_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -2054,9 +2056,11 @@ static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_E0PD */ -#ifdef CONFIG_ARM64_PSEUDO_NMI +#if IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) || IS_ENABLED(CONFIG_ARM64_NMI) bool enable_pseudo_nmi; +#endif +#ifdef CONFIG_ARM64_PSEUDO_NMI static int __init early_enable_pseudo_nmi(char *p) { return strtobool(p, &enable_pseudo_nmi); @@ -2097,6 +2101,41 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_MTE */ +#ifdef CONFIG_ARM64_NMI +static bool use_nmi(const struct arm64_cpu_capabilities *entry, int scope) +{ + if (!has_cpuid_feature(entry, scope)) + return false; + + /* + * Having both real and pseudo NMIs enabled simultaneously is + * likely to cause confusion. Since pseudo NMIs must be + * enabled with an explicit command line option, if the user + * has set that option on a system with real NMIs for some + * reason assume they know what they're doing. + */ + if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && enable_pseudo_nmi) { + pr_info("Pseudo NMI enabled, not using architected NMI\n"); + return false; + } + + return true; +} + +static void nmi_enable(const struct arm64_cpu_capabilities *__unused) +{ + /* + * Enable use of NMIs controlled by ALLINT, SPINTMASK should + * be clear by default but make it explicit that we are using + * this mode. Ensure that ALLINT is clear first in order to + * avoid leaving things masked. + */ + _allint_clear(); + sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPINTMASK, SCTLR_EL1_NMI); + isb(); +} +#endif + static void elf_hwcap_fixup(void) { #ifdef CONFIG_ARM64_ERRATUM_1742098 @@ -2750,6 +2789,32 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_XINT, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_xint_support, + + }, +#endif +#ifdef CONFIG_ARM64_NMI + { + .desc = "Non-maskable Interrupts present", + .capability = ARM64_HAS_NMI, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR1_NMI_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64PFR1_NMI_IMP_DEF, + .matches = has_cpuid_feature, + }, + { + .desc = "Non-maskable Interrupts enabled", + .capability = ARM64_USES_NMI, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR1_NMI_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64PFR1_NMI_IMP_DEF, + .matches = use_nmi, + .cpu_enable = nmi_enable, }, #endif {}, diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 4704d00d38131f01a38bfe392a203c389b86452d..5f854b0e44ade339d03f00383f45c7d334bb8ea6 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -557,11 +557,24 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) msr sctlr_el2, x0 #ifdef CONFIG_ARM64_VHE + mrs x2, id_aa64pfr1_el1 + ubfx x2, x2, #ID_AA64PFR1_NMI_SHIFT, #4 + cbz x2, .Lskip_nmi +.Linit_nmi: + mrs x2, id_aa64mmfr1_el1 + ubfx x2, x2, #ID_AA64MMFR1_HCX_SHIFT, #4 + cbz x2, .Lskip_nmi + + mrs_s x2, SYS_HCRX_EL2 + bic x2, x2, #HCRX_EL2_TALLINT_MASK // Don't trap ALLINT + msr_s SYS_HCRX_EL2, x2 + /* * Check for VHE being present. For the rest of the EL2 setup, * x2 being non-zero indicates that we do have VHE, and that the * kernel is intended to run at EL2. */ +.Lskip_nmi: mrs x2, id_aa64mmfr1_el1 ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 #else diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 39a854ec93cac5016088f3e6905ff2580136b633..b71cfb7a0421b82a998b890cdf80232f3595e5da 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -543,7 +543,8 @@ static void gic_irq_nmi_teardown(struct irq_data *d) static void gic_eoi_irq(struct irq_data *d) { - gic_write_eoir(gic_irq(d)); + write_gicreg(gic_irq(d), ICC_EOIR1_EL1); + isb(); } static void gic_eoimode1_eoi_irq(struct irq_data *d) @@ -627,85 +628,99 @@ static void gic_deactivate_unhandled(u32 irqnr) if (irqnr < 8192) gic_write_dir(irqnr); } else { - gic_write_eoir(irqnr); + write_gicreg(irqnr, ICC_EOIR1_EL1); + isb(); } } -static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) +/* + * Follow a read of the IAR with any HW maintenance that needs to happen prior + * to invoking the relevant IRQ handler. We must do two things: + * + * (1) Ensure instruction ordering between a read of IAR and subsequent + * instructions in the IRQ handler using an ISB. + * + * It is possible for the IAR to report an IRQ which was signalled *after* + * the CPU took an IRQ exception as multiple interrupts can race to be + * recognized by the GIC, earlier interrupts could be withdrawn, and/or + * later interrupts could be prioritized by the GIC. + * + * For devices which are tightly coupled to the CPU, such as PMUs, a + * context synchronization event is necessary to ensure that system + * register state is not stale, as these may have been indirectly written + * *after* exception entry. + * + * (2) Deactivate the interrupt when EOI mode 1 is in use. + */ +static inline void gic_complete_ack(u32 irqnr) { - bool irqs_enabled = interrupts_enabled(regs); - int err; + if (static_branch_likely(&supports_deactivate_key)) + write_gicreg(irqnr, ICC_EOIR1_EL1); - if (irqs_enabled) - nmi_enter(); + isb(); +} - if (static_branch_likely(&supports_deactivate_key)) - gic_write_eoir(irqnr); - else - isb(); +static bool gic_rpr_is_nmi_prio(void) +{ + if (!gic_supports_nmi()) + return false; - /* - * Leave the PSR.I bit set to prevent other NMIs to be - * received while handling this one. - * PSR.I will be restored when we ERET to the - * interrupted context. - */ - err = handle_domain_nmi(gic_data.domain, irqnr, regs); - if (err) - gic_deactivate_unhandled(irqnr); + return unlikely(gic_read_rpr() == GICD_INT_NMI_PRI); +} - if (irqs_enabled) - nmi_exit(); +static bool gic_irqnr_is_special(u32 irqnr) +{ + return irqnr >= 1020 && irqnr <= 1023; } -static u32 do_read_iar(struct pt_regs *regs) +static void __gic_handle_irq(u32 irqnr, struct pt_regs *regs) { - u32 iar; + if (gic_irqnr_is_special(irqnr)) + return; - if (gic_supports_nmi() && unlikely(!interrupts_enabled(regs))) { - u64 pmr; + gic_complete_ack(irqnr); - /* - * We were in a context with IRQs disabled. However, the - * entry code has set PMR to a value that allows any - * interrupt to be acknowledged, and not just NMIs. This can - * lead to surprising effects if the NMI has been retired in - * the meantime, and that there is an IRQ pending. The IRQ - * would then be taken in NMI context, something that nobody - * wants to debug twice. - * - * Until we sort this, drop PMR again to a level that will - * actually only allow NMIs before reading IAR, and then - * restore it to what it was. - */ - pmr = gic_read_pmr(); - gic_pmr_mask_irqs(); - isb(); + if (handle_domain_irq(gic_data.domain, irqnr, regs)) { + WARN_ONCE(true, "Unexpected interrupt received!\n"); + gic_deactivate_unhandled(irqnr); + } +} - iar = gic_read_iar(); +static void __gic_handle_nmi(u32 irqnr, struct pt_regs *regs) +{ + if (gic_irqnr_is_special(irqnr)) + return; - gic_write_pmr(pmr); - } else { - iar = gic_read_iar(); - } + gic_complete_ack(irqnr); - return iar; + if (handle_domain_nmi(gic_data.domain, irqnr, regs)) + gic_deactivate_unhandled(irqnr); } -static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) +/* + * An exception has been taken from a context with IRQs enabled, and this could + * be an IRQ or an NMI. + * + * The entry code called us with DAIF.IF set to keep NMIs masked. We must clear + * DAIF.IF (and update ICC_PMR_EL1 to mask regular IRQs) prior to returning, + * after handling any NMI but before handling any IRQ. + * + * The entry code has performed IRQ entry, and if an NMI is detected we must + * perform NMI entry/exit around invoking the handler. + */ +static void __gic_handle_irq_from_irqson(struct pt_regs *regs) { + bool is_nmi; u32 irqnr; - irqnr = do_read_iar(regs); + irqnr = gic_read_iar(); - /* Check for special IDs first */ - if ((irqnr >= 1020 && irqnr <= 1023)) - return; + is_nmi = gic_rpr_is_nmi_prio(); - if (gic_supports_nmi() && - unlikely(gic_read_rpr() == GICD_INT_RPR_PRI(GICD_INT_NMI_PRI))) { - gic_handle_nmi(irqnr, regs); - return; + if (is_nmi) { + nmi_enter(); + __gic_handle_nmi(irqnr, regs); + nmi_exit(); } if (gic_prio_masking_enabled()) { @@ -713,15 +728,52 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs gic_arch_enable_irqs(); } - if (static_branch_likely(&supports_deactivate_key)) - gic_write_eoir(irqnr); - else - isb(); + if (!is_nmi) + __gic_handle_irq(irqnr, regs); +} - if (handle_domain_irq(gic_data.domain, irqnr, regs)) { - WARN_ONCE(true, "Unexpected interrupt received!\n"); - gic_deactivate_unhandled(irqnr); - } +/* + * An exception has been taken from a context with IRQs disabled, which can only + * be an NMI. + * + * The entry code called us with DAIF.IF set to keep NMIs masked. We must leave + * DAIF.IF (and ICC_PMR_EL1) unchanged. + * + * The entry code has performed NMI entry. + */ +static void __gic_handle_irq_from_irqsoff(struct pt_regs *regs) +{ + u64 pmr; + u32 irqnr; + + /* + * We were in a context with IRQs disabled. However, the + * entry code has set PMR to a value that allows any + * interrupt to be acknowledged, and not just NMIs. This can + * lead to surprising effects if the NMI has been retired in + * the meantime, and that there is an IRQ pending. The IRQ + * would then be taken in NMI context, something that nobody + * wants to debug twice. + * + * Until we sort this, drop PMR again to a level that will + * actually only allow NMIs before reading IAR, and then + * restore it to what it was. + */ + pmr = gic_read_pmr(); + gic_pmr_mask_irqs(); + isb(); + irqnr = gic_read_iar(); + gic_write_pmr(pmr); + + __gic_handle_nmi(irqnr, regs); +} + +static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) +{ + if (unlikely(gic_supports_nmi() && !interrupts_enabled(regs))) + __gic_handle_irq_from_irqsoff(regs); + else + __gic_handle_irq_from_irqson(regs); } #ifdef CONFIG_FAST_IRQ