From 51aea45cda951838c9843cea1f16e765bca7403d Mon Sep 17 00:00:00 2001 From: Ruidong Tian Date: Thu, 4 Dec 2025 11:24:50 +0800 Subject: [PATCH] anolis: introduce fault-tolerant access for DAX PFN metadata ANBZ: #28470 In DAX-backed memory scenarios, MCEs triggered by reading struct page metadata signify device-level failures rather than global system instability. Unlike failures in core system memory, these errors are localized to the extension hardware. Implementing fault-tolerant access for these metadata structures allows the kernel to gracefully isolate the failing device, minimizing the "blast radius" of the hardware error and ensuring the rest of the system remains operational. Reproduce and testing: 1. Inject error to PFN metadata 2. mmap and read Before apply this patch, kernel will panic: mce: [Hardware Error]: CPU 120: Machine Check Exception: f Bank 1: bd80000000100134 mce: [Hardware Error]: RIP 10: {dax_set_mapping.isra.0+0xce/0x140} mce: [Hardware Error]: TSC ee24b9e2d5 ADDR b213398000 MISC 86 PPIN 6deeb6484732971d mce: [Hardware Error]: PROCESSOR 0:a06d1 TIME 1765336050 SOCKET 0 APIC b1 microcode 10003f3 mce: [Hardware Error]: Run the above through 'mcelog --ascii' mce: [Hardware Error]: Machine check: Data load in unrecoverable area of kernel Kernel panic - not syncing: Fatal local machine check After apply this patch: User application receive SIGBUS, system still alive. Signed-off-by: Ruidong Tian --- drivers/dax/dax-private.h | 15 +++++++++++++++ drivers/dax/device.c | 20 ++++++++++++++++---- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h index 27cf2daaaa79..6f2c48335d0d 100644 --- a/drivers/dax/dax-private.h +++ b/drivers/dax/dax-private.h @@ -114,4 +114,19 @@ static inline bool dax_align_valid(unsigned long align) return align == PAGE_SIZE; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +#ifndef copy_mc_to_kernel +static inline int dax_test_page_mc(const struct page *page) +{ + return 0; +} +#else +#include +static inline int dax_test_page_mc(const struct page *page) +{ + struct page _p; + + return copy_mc_to_kernel(&_p, page, sizeof(struct page)); +} +#endif #endif diff --git a/drivers/dax/device.c b/drivers/dax/device.c index cfb122b3fee3..63ee2c53a921 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -73,7 +73,7 @@ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, return -1; } -static void dax_set_mapping(struct vm_fault *vmf, pfn_t pfn, +static int dax_set_mapping(struct vm_fault *vmf, pfn_t pfn, unsigned long fault_size) { unsigned long i, nr_pages = fault_size / PAGE_SIZE; @@ -88,6 +88,13 @@ static void dax_set_mapping(struct vm_fault *vmf, pfn_t pfn, pgoff = linear_page_index(vmf->vma, ALIGN_DOWN(vmf->address, fault_size)); + for (i = 0; i < nr_pages; i++) { + struct page *p = pfn_to_page(pfn_t_to_pfn(pfn) + i); + + if (dax_test_page_mc(p) || dax_test_page_mc(compound_head(p))) + return -EFAULT; + } + for (i = 0; i < nr_pages; i++) { struct page *page = pfn_to_page(pfn_t_to_pfn(pfn) + i); @@ -98,6 +105,8 @@ static void dax_set_mapping(struct vm_fault *vmf, pfn_t pfn, page->mapping = filp->f_mapping; page->index = pgoff + i; } + + return 0; } static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax, @@ -128,7 +137,8 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax, pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); - dax_set_mapping(vmf, pfn, fault_size); + if (dax_set_mapping(vmf, pfn, fault_size)) + return VM_FAULT_SIGBUS; return vmf_insert_mixed(vmf->vma, vmf->address, pfn); } @@ -171,7 +181,8 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax, pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); - dax_set_mapping(vmf, pfn, fault_size); + if (dax_set_mapping(vmf, pfn, fault_size)) + return VM_FAULT_SIGBUS; return vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE); } @@ -216,7 +227,8 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP); - dax_set_mapping(vmf, pfn, fault_size); + if (dax_set_mapping(vmf, pfn, fault_size)) + return VM_FAULT_SIGBUS; return vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE); } -- Gitee