From 5477d1088719603ee0ab2993f9870e7b475f9161 Mon Sep 17 00:00:00 2001 From: OCHyams Date: Fri, 10 Feb 2023 17:43:29 +0000 Subject: [PATCH 1/2] [Assignment Tracking] Fix migrateDebuginfo in SROA Without this patch, migrateDebugInfo doesn't understand how to handle existing fragments that are smaller than the to-be-split store. This can occur if. e.g. a vector store (1 dbg.assign) is split (many dbg.assigns - 1 fragment for each scalar) and later those stores are re-vectorized (many dbg.assigns), and then SROA runs on that. The approach taken in this patch is to drop intrinsics with fragments outside of the slice. For example, starting with: store <2 x float> %v, ptr %dest !DIAssignID !1 call void @llvm.dbg.assign(..., DIExpression(DW_OP_LLVM_fragment, 0, 32), !1, ...) call void @llvm.dbg.assign(..., DIExpression(DW_OP_LLVM_fragment, 32, 32), !1, ...) When visiting the slice of bits 0 to 31 we get: store float %v.extract.0, ptr %dest !DIAssignID !2 call void @llvm.dbg.assign(..., DIExpression(DW_OP_LLVM_fragment, 0, 32), !2, ...) The other dbg.assign associated with the currently-split store is dropped for this split part. And visiting bits 32 to 63 we get the following: store float %v.extract.1, ptr %adjusted.dest !DIAssignID !3 call void @llvm.dbg.assign(..., DIExpression(DW_OP_LLVM_fragment, 32, 32), !3, ...) I've added two tests that cover this case. Implementing this meant re-writing the fragment-calculation part of migrateDebugInfo to work with the absolute offset of the new slice in terms of the base alloca (instead of the offset of the slice into the new alloca), the fragment (if any) of the variable associated with the base alloca, and the fragment associated with the split store. Because we need the offset into the base alloca for the variables being split, some careful wiring is required for memory intrinsics due to the fact that memory intrinsics can be split when either the source or dest allocas are split. In the case where the source alloca drives the splitting, we need to be careful to pass migrateDebugInfo the information in relation to the dest alloca. Reviewed By: StephenTozer Differential Revision: https://reviews.llvm.org/D143146 --- llvm/include/llvm/IR/DebugInfoMetadata.h | 5 + llvm/lib/Transforms/Scalar/SROA.cpp | 204 +++++++++++++----- .../sroa/split-pre-fragmented-store-2.ll | 113 ++++++++++ .../sroa/split-pre-fragmented-store.ll | 106 +++++++++ 4 files changed, 371 insertions(+), 57 deletions(-) create mode 100644 llvm/test/DebugInfo/Generic/assignment-tracking/sroa/split-pre-fragmented-store-2.ll create mode 100644 llvm/test/DebugInfo/Generic/assignment-tracking/sroa/split-pre-fragmented-store.ll diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h index def3ce2b56ea..cedac2838e88 100644 --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -2773,6 +2773,11 @@ public: struct FragmentInfo { uint64_t SizeInBits; uint64_t OffsetInBits; + /// Return the index of the first bit of the fragment. + uint64_t startInBits() const { return OffsetInBits; } + /// Return the index of the bit after the end of the fragment, e.g. for + /// fragment offset=16 and size=32 return their sum, 48. + uint64_t endInBits() const { return OffsetInBits + SizeInBits; } }; /// Retrieve the details of this fragment expression. diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 818d64725893..877c07ac36e9 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -118,13 +118,65 @@ STATISTIC(NumVectorized, "Number of vectorized aggregates"); /// GEPs. static cl::opt SROAStrictInbounds("sroa-strict-inbounds", cl::init(false), cl::Hidden); +/// Disable running mem2reg during SROA in order to test or debug SROA. +static cl::opt SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), + cl::Hidden); namespace { + +/// Calculate the fragment of a variable to use when slicing a store +/// based on the slice dimensions, existing fragment, and base storage +/// fragment. +/// Note that a returned value of std::nullopt indicates that there is +/// no appropriate fragment available (rather than meaning use the whole +/// variable, which is a common usage). Because the store is being sliced +/// we always expect a fragment - there's never a case where the whole +/// variable should be used. +static std::optional +calculateFragment(uint64_t NewStorageSliceOffsetInBits, + uint64_t NewStorageSliceSizeInBits, + std::optional StorageFragment, + std::optional CurrentFragment) { + DIExpression::FragmentInfo Target; + // If the base storage describes part of the variable apply the offset and + // the size constraint. + if (StorageFragment) { + Target.SizeInBits = + std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits); + Target.OffsetInBits = + NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits; + } else { + Target.SizeInBits = NewStorageSliceSizeInBits; + Target.OffsetInBits = NewStorageSliceOffsetInBits; + } + + // No additional work to do if there isn't a fragment already, or there is + // but it already exactly describes the new assignment. + if (!CurrentFragment || *CurrentFragment == Target) + return Target; + + // Reject the target fragment if it doesn't fit wholly within the current + // fragment. TODO: We could instead chop up the target to fit in the case of + // a partial overlap. + if (Target.startInBits() < CurrentFragment->startInBits() || + Target.endInBits() > CurrentFragment->endInBits()) + return std::nullopt; + + // Target fits within the current fragment, return it. + return Target; +} + +static DebugVariable getAggregateVariable(DbgVariableIntrinsic *DVI) { + return DebugVariable(DVI->getVariable(), std::nullopt, + DVI->getDebugLoc().getInlinedAt()); +} + /// Find linked dbg.assign and generate a new one with the correct /// FragmentInfo. Link Inst to the new dbg.assign. If Value is nullptr the /// value component is copied from the old dbg.assign to the new. /// \param OldAlloca Alloca for the variable before splitting. -/// \param RelativeOffsetInBits Offset into \p OldAlloca relative to the -/// offset prior to splitting (change in offset). +/// \param IsSplit True if the store (not necessarily alloca) +/// is being split. +/// \param OldAllocaOffsetInBits Offset of the slice taken from OldAlloca. /// \param SliceSizeInBits New number of bits being written to. /// \param OldInst Instruction that is being split. /// \param Inst New instruction performing this part of the @@ -132,8 +184,8 @@ namespace { /// \param Dest Store destination. /// \param Value Stored value. /// \param DL Datalayout. -static void migrateDebugInfo(AllocaInst *OldAlloca, - uint64_t RelativeOffsetInBits, +static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, + uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL) { @@ -144,7 +196,9 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, LLVM_DEBUG(dbgs() << " migrateDebugInfo\n"); LLVM_DEBUG(dbgs() << " OldAlloca: " << *OldAlloca << "\n"); - LLVM_DEBUG(dbgs() << " RelativeOffset: " << RelativeOffsetInBits << "\n"); + LLVM_DEBUG(dbgs() << " IsSplit: " << IsSplit << "\n"); + LLVM_DEBUG(dbgs() << " OldAllocaOffsetInBits: " << OldAllocaOffsetInBits + << "\n"); LLVM_DEBUG(dbgs() << " SliceSizeInBits: " << SliceSizeInBits << "\n"); LLVM_DEBUG(dbgs() << " OldInst: " << *OldInst << "\n"); LLVM_DEBUG(dbgs() << " Inst: " << *Inst << "\n"); @@ -152,13 +206,19 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, if (Value) LLVM_DEBUG(dbgs() << " Value: " << *Value << "\n"); + /// Map of aggregate variables to their fragment associated with OldAlloca. + DenseMap> + BaseFragments; + for (auto *DAI : at::getAssignmentMarkers(OldAlloca)) + BaseFragments[getAggregateVariable(DAI)] = + DAI->getExpression()->getFragmentInfo(); + // The new inst needs a DIAssignID unique metadata tag (if OldInst has // one). It shouldn't already have one: assert this assumption. assert(!Inst->getMetadata(LLVMContext::MD_DIAssignID)); DIAssignID *NewID = nullptr; auto &Ctx = Inst->getContext(); DIBuilder DIB(*OldInst->getModule(), /*AllowUnresolved*/ false); - uint64_t AllocaSizeInBits = *OldAlloca->getAllocationSizeInBits(DL); assert(OldAlloca->isStaticAlloca()); for (DbgAssignIntrinsic *DbgAssign : MarkerRange) { @@ -166,30 +226,38 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, << "\n"); auto *Expr = DbgAssign->getExpression(); - // Check if the dbg.assign already describes a fragment. - auto GetCurrentFragSize = [AllocaSizeInBits, DbgAssign, - Expr]() -> uint64_t { - if (auto FI = Expr->getFragmentInfo()) - return FI->SizeInBits; - if (auto VarSize = DbgAssign->getVariable()->getSizeInBits()) - return *VarSize; - // The variable type has an unspecified size. This can happen in the - // case of DW_TAG_unspecified_type types, e.g. std::nullptr_t. Because - // there is no fragment and we do not know the size of the variable type, - // we'll guess by looking at the alloca. - return AllocaSizeInBits; - }; - uint64_t CurrentFragSize = GetCurrentFragSize(); - bool MakeNewFragment = CurrentFragSize != SliceSizeInBits; - assert(MakeNewFragment || RelativeOffsetInBits == 0); - - assert(SliceSizeInBits <= AllocaSizeInBits); - if (MakeNewFragment) { - assert(RelativeOffsetInBits + SliceSizeInBits <= CurrentFragSize); - auto E = DIExpression::createFragmentExpression( - Expr, RelativeOffsetInBits, SliceSizeInBits); - assert(E && "Failed to create fragment expr!"); - Expr = *E; + if (IsSplit) { + std::optional BaseFragment = std::nullopt; + { + auto R = BaseFragments.find(getAggregateVariable(DbgAssign)); + if (R == BaseFragments.end()) + continue; + BaseFragment = R->second; + } + std::optional CurrentFragment = + Expr->getFragmentInfo(); + std::optional NewFragment = + calculateFragment(OldAllocaOffsetInBits, SliceSizeInBits, + BaseFragment, CurrentFragment); + // Note that std::nullopt here means "skip this fragment" rather than + // "there is no fragment / use the whole variable". + if (!NewFragment) + continue; + + if (!(NewFragment == CurrentFragment)) { + if (CurrentFragment) { + // Rewrite NewFragment to be relative to the existing one (this is + // what createFragmentExpression wants). CalculateFragment has + // already resolved the size for us. FIXME: Should it return the + // relative fragment too? + NewFragment->OffsetInBits -= CurrentFragment->OffsetInBits; + } + + auto E = DIExpression::createFragmentExpression( + Expr, NewFragment->OffsetInBits, NewFragment->SizeInBits); + assert(E && "Failed to create fragment expr!"); + Expr = *E; + } } // If we haven't created a DIAssignID ID do that now and attach it to Inst. @@ -2556,7 +2624,6 @@ class llvm::sroa::AllocaSliceRewriter // original alloca. uint64_t NewBeginOffset = 0, NewEndOffset = 0; - uint64_t RelativeOffset = 0; uint64_t SliceSize = 0; bool IsSplittable = false; bool IsSplit = false; @@ -2630,14 +2697,13 @@ public: NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset); NewEndOffset = std::min(EndOffset, NewAllocaEndOffset); - RelativeOffset = NewBeginOffset - BeginOffset; SliceSize = NewEndOffset - NewBeginOffset; LLVM_DEBUG(dbgs() << " Begin:(" << BeginOffset << ", " << EndOffset << ") NewBegin:(" << NewBeginOffset << ", " << NewEndOffset << ") NewAllocaBegin:(" << NewAllocaBeginOffset << ", " << NewAllocaEndOffset << ")\n"); - assert(IsSplit || RelativeOffset == 0); + assert(IsSplit || NewBeginOffset == BeginOffset); OldUse = I->getUse(); OldPtr = cast(OldUse->get()); @@ -2900,8 +2966,8 @@ private: Pass.DeadInsts.push_back(&SI); // NOTE: Careful to use OrigV rather than V. - migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &SI, Store, - Store->getPointerOperand(), OrigV, DL); + migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI, + Store, Store->getPointerOperand(), OrigV, DL); LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); return true; } @@ -2925,8 +2991,9 @@ private: if (AATags) Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); - migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &SI, Store, - Store->getPointerOperand(), Store->getValueOperand(), DL); + migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI, + Store, Store->getPointerOperand(), + Store->getValueOperand(), DL); Pass.DeadInsts.push_back(&SI); LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); @@ -3004,8 +3071,9 @@ private: if (NewSI->isAtomic()) NewSI->setAlignment(SI.getAlign()); - migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &SI, NewSI, - NewSI->getPointerOperand(), NewSI->getValueOperand(), DL); + migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI, + NewSI, NewSI->getPointerOperand(), + NewSI->getValueOperand(), DL); Pass.DeadInsts.push_back(&SI); deleteIfTriviallyDead(OldOp); @@ -3105,8 +3173,8 @@ private: if (AATags) New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); - migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &II, New, - New->getRawDest(), nullptr, DL); + migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II, + New, New->getRawDest(), nullptr, DL); LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); return false; @@ -3181,8 +3249,8 @@ private: if (AATags) New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); - migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &II, New, - New->getPointerOperand(), V, DL); + migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II, + New, New->getPointerOperand(), V, DL); LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); return !II.isVolatile(); @@ -3310,8 +3378,16 @@ private: if (AATags) New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); - migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &II, New, - DestPtr, nullptr, DL); + APInt Offset(DL.getIndexTypeSizeInBits(DestPtr->getType()), 0); + if (IsDest) { + migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, + &II, New, DestPtr, nullptr, DL); + } else if (AllocaInst *Base = dyn_cast( + DestPtr->stripAndAccumulateConstantOffsets( + DL, Offset, /*AllowNonInbounds*/ true))) { + migrateDebugInfo(Base, IsSplit, Offset.getZExtValue() * 8, + SliceSize * 8, &II, New, DestPtr, nullptr, DL); + } LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); return false; } @@ -3399,8 +3475,18 @@ private: if (AATags) Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); - migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &II, Store, - DstPtr, Src, DL); + APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0); + if (IsDest) { + + migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II, + Store, DstPtr, Src, DL); + } else if (AllocaInst *Base = dyn_cast( + DstPtr->stripAndAccumulateConstantOffsets( + DL, Offset, /*AllowNonInbounds*/ true))) { + migrateDebugInfo(Base, IsSplit, Offset.getZExtValue() * 8, SliceSize * 8, + &II, Store, DstPtr, Src, DL); + } + LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); return !II.isVolatile(); } @@ -3762,23 +3848,22 @@ private: APInt Offset( DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0); - if (AATags && - GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset)) + GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset); + if (AATags) Store->setAAMetadata(AATags.shift(Offset.getZExtValue())); // migrateDebugInfo requires the base Alloca. Walk to it from this gep. // If we cannot (because there's an intervening non-const or unbounded // gep) then we wouldn't expect to see dbg.assign intrinsics linked to // this instruction. - APInt OffsetInBytes(DL.getTypeSizeInBits(Ptr->getType()), false); - Value *Base = InBoundsGEP->stripAndAccumulateInBoundsConstantOffsets( - DL, OffsetInBytes); + Value *Base = AggStore->getPointerOperand()->stripInBoundsOffsets(); if (auto *OldAI = dyn_cast(Base)) { uint64_t SizeInBits = DL.getTypeSizeInBits(Store->getValueOperand()->getType()); - migrateDebugInfo(OldAI, OffsetInBytes.getZExtValue() * 8, SizeInBits, - AggStore, Store, Store->getPointerOperand(), - Store->getValueOperand(), DL); + migrateDebugInfo(OldAI, /*IsSplit*/ true, Offset.getZExtValue() * 8, + SizeInBits, AggStore, Store, + Store->getPointerOperand(), Store->getValueOperand(), + DL); } else { assert(at::getAssignmentMarkers(Store).empty() && "AT: unexpected debug.assign linked to store through " @@ -5109,8 +5194,13 @@ bool SROAPass::promoteAllocas(Function &F) { NumPromoted += PromotableAllocas.size(); - LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n"); - PromoteMemToReg(PromotableAllocas, DTU->getDomTree(), AC); + if (SROASkipMem2Reg) { + LLVM_DEBUG(dbgs() << "Not promoting allocas with mem2reg!\n"); + } else { + LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n"); + PromoteMemToReg(PromotableAllocas, DTU->getDomTree(), AC); + } + PromotableAllocas.clear(); return true; } diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/split-pre-fragmented-store-2.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/split-pre-fragmented-store-2.ll new file mode 100644 index 000000000000..e0c16531f836 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/split-pre-fragmented-store-2.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=sroa -sroa-skip-mem2reg %s \ +; RUN: | FileCheck %s --implicit-check-not="call void @llvm.dbg" + +;; NOTE: This is the same as split-pre-fragmented-store.ll except the base +;; alloca's dbg.assign has been altered to contain a fragment of the full +;; variable - the variable's size has been modified from 64 to 96 bits. +;; This version of the test ensures that the behaviour being tested is still +;; correct when the base alloca doesn't hold an entire variable. + +;; IR hand-modified, originally generated from: +;; struct Pair { int a; int b; }; +;; Pair getVar(); +;; int fun() { +;; Pair var; +;; var = getVar(); +;; return var.b; +;; } +;; Modification: split the dbg.assign linked the the memcpy(64 bits) into two, +;; each describing a 32 bit fragment. +;; +;; Check that assignment tracking updates in SROA work when the store being +;; split is described with one dbg.assign (covering different fragments). The +;; store may have been already split and then merged again at some point. + +;; Alloca for var.a and associated dbg.assign: + +;; Alloca for var.b and associated dbg.assign: + +;; Store to var.b (split from store to var) and associated dbg.assigns. The +;; dbg.assign for the fragment covering the (pre-split) assignment to var.a +;; should not be linked to the store. + + +%struct.Tuple = type { i32, i32, i32 } + +define dso_local noundef i32 @_Z3funv() !dbg !9 { +; CHECK-LABEL: @_Z3funv( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR_SROA_0:%.*]] = alloca i32, align 4, !DIAssignID [[DIASSIGNID20:![0-9]+]] +; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata [[META14:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32), metadata [[DIASSIGNID20]], metadata ptr [[VAR_SROA_0]], metadata !DIExpression()), !dbg [[DBG21:![0-9]+]] +; CHECK-NEXT: [[VAR_SROA_1:%.*]] = alloca i32, align 4, !DIAssignID [[DIASSIGNID22:![0-9]+]] +; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata [[META14]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 32), metadata [[DIASSIGNID22]], metadata ptr [[VAR_SROA_1]], metadata !DIExpression()), !dbg [[DBG21]] +; CHECK-NEXT: [[REF_TMP_SROA_0:%.*]] = alloca i64, align 8 +; CHECK-NEXT: [[CALL:%.*]] = call i64 @_Z6getVarv(), !dbg [[DBG23:![0-9]+]] +; CHECK-NEXT: store i64 [[CALL]], ptr [[REF_TMP_SROA_0]], align 8, !dbg [[DBG23]] +; CHECK-NEXT: [[REF_TMP_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr [[REF_TMP_SROA_0]], align 8, !dbg [[DBG24:![0-9]+]] +; CHECK-NEXT: [[VAR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[REF_TMP_SROA_0_0_COPYLOAD]] to i32, !dbg [[DBG24]] +; CHECK-NEXT: store i32 [[VAR_SROA_0_0_EXTRACT_TRUNC]], ptr [[VAR_SROA_0]], align 4, !dbg [[DBG24]] +; CHECK-NEXT: [[VAR_SROA_1_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[REF_TMP_SROA_0_0_COPYLOAD]], 32, !dbg [[DBG24]] +; CHECK-NEXT: [[VAR_SROA_1_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[VAR_SROA_1_0_EXTRACT_SHIFT]] to i32, !dbg [[DBG24]] +; CHECK-NEXT: store i32 [[VAR_SROA_1_0_EXTRACT_TRUNC]], ptr [[VAR_SROA_1]], align 4, !dbg [[DBG24]], !DIAssignID [[DIASSIGNID25:![0-9]+]] +; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 [[VAR_SROA_1_0_EXTRACT_TRUNC]], metadata [[META14]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 32), metadata [[DIASSIGNID25]], metadata ptr [[VAR_SROA_1]], metadata !DIExpression()), !dbg [[DBG21]] +; CHECK-NEXT: [[VAR_SROA_1_4_:%.*]] = load i32, ptr [[VAR_SROA_1]], align 4, !dbg [[DBG26:![0-9]+]] +; CHECK-NEXT: ret i32 [[VAR_SROA_1_4_]], !dbg [[DBG27:![0-9]+]] +; +entry: + %var = alloca [2 x i32], align 4, !DIAssignID !19 + call void @llvm.dbg.assign(metadata i1 undef, metadata !14, metadata !DIExpression(DW_OP_LLVM_fragment, 32, 64), metadata !19, metadata ptr %var, metadata !DIExpression()), !dbg !20 + %ref.tmp = alloca %struct.Tuple, align 4 + %call = call i64 @_Z6getVarv(), !dbg !22 + store i64 %call, ptr %ref.tmp, align 4, !dbg !22 + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %var, ptr align 4 %ref.tmp, i64 8, i1 false), !dbg !23, !DIAssignID !29 + call void @llvm.dbg.assign(metadata i1 undef, metadata !14, metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32), metadata !29, metadata ptr %var, metadata !DIExpression()), !dbg !20 + call void @llvm.dbg.assign(metadata i1 undef, metadata !14, metadata !DIExpression(DW_OP_LLVM_fragment, 64, 32), metadata !29, metadata ptr %var, metadata !DIExpression(DW_OP_plus, DW_OP_constu, 4)), !dbg !20 + %b = getelementptr inbounds %struct.Tuple, ptr %var, i32 0, i32 1, !dbg !31 + %0 = load i32, ptr %b, align 4, !dbg !31 + ret i32 %0, !dbg !35 +} + +declare !dbg !36 i64 @_Z6getVarv() +declare void @llvm.dbg.declare(metadata, metadata, metadata) +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) +declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7} +!llvm.ident = !{!8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.cpp", directory: "/") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 8, !"PIC Level", i32 2} +!6 = !{i32 7, !"PIE Level", i32 2} +!7 = !{i32 7, !"uwtable", i32 2} +!8 = !{!"clang version 16.0.0"} +!9 = distinct !DISubprogram(name: "fun", linkageName: "_Z3funv", scope: !1, file: !1, line: 3, type: !10, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13) +!10 = !DISubroutineType(types: !11) +!11 = !{!12} +!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !{!14} +!14 = !DILocalVariable(name: "var", scope: !9, file: !1, line: 4, type: !15) +!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Tuple", file: !1, line: 1, size: 96, flags: DIFlagTypePassByValue, elements: !16, identifier: "_ZTS4Pair") +!16 = !{!17, !18, !40} +!17 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !15, file: !1, line: 1, baseType: !12, size: 32) +!18 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !15, file: !1, line: 1, baseType: !12, size: 32, offset: 32) +!19 = distinct !DIAssignID() +!20 = !DILocation(line: 0, scope: !9) +!21 = !DILocation(line: 4, column: 3, scope: !9) +!22 = !DILocation(line: 5, column: 9, scope: !9) +!23 = !DILocation(line: 5, column: 7, scope: !9) +!29 = distinct !DIAssignID() +!30 = !DILocation(line: 5, column: 3, scope: !9) +!31 = !DILocation(line: 6, column: 14, scope: !9) +!34 = !DILocation(line: 7, column: 1, scope: !9) +!35 = !DILocation(line: 6, column: 3, scope: !9) +!36 = !DISubprogram(name: "getVar", linkageName: "_Z6getVarv", scope: !1, file: !1, line: 2, type: !37, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !39) +!37 = !DISubroutineType(types: !38) +!38 = !{!15} +!39 = !{} +!40 = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: !15, file: !1, line: 1, baseType: !12, size: 32, offset: 64) diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/split-pre-fragmented-store.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/split-pre-fragmented-store.ll new file mode 100644 index 000000000000..05081020911d --- /dev/null +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/split-pre-fragmented-store.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=sroa -sroa-skip-mem2reg %s \ +; RUN: | FileCheck %s --implicit-check-not="call void @llvm.dbg" + +;; IR hand-modified, originally generated from: +;; struct Pair { int a; int b; }; +;; Pair getVar(); +;; int fun() { +;; Pair var; +;; var = getVar(); +;; return var.b; +;; } +;; Modification: split the dbg.assign linked the the memcpy(64 bits) into two, +;; each describing a 32 bit fragment. +;; +;; Check that assignment tracking updates in SROA work when the store being +;; split is described with one dbg.assign (covering different fragments). The +;; store may have been already split and then merged again at some point. + +;; Alloca for var.a and associated dbg.assign: + +;; Alloca for var.b and associated dbg.assign: + +;; Store to var.b (split from store to var) and associated dbg.assigns. The +;; dbg.assign for the fragment covering the (pre-split) assignment to var.a +;; should not be linked to the store. + + +%struct.Pair = type { i32, i32 } + +define dso_local noundef i32 @_Z3funv() !dbg !9 { +; CHECK-LABEL: @_Z3funv( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR_SROA_0:%.*]] = alloca i32, align 4, !DIAssignID [[DIASSIGNID19:![0-9]+]] +; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata [[META14:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32), metadata [[DIASSIGNID19]], metadata ptr [[VAR_SROA_0]], metadata !DIExpression()), !dbg [[DBG20:![0-9]+]] +; CHECK-NEXT: [[VAR_SROA_1:%.*]] = alloca i32, align 4, !DIAssignID [[DIASSIGNID21:![0-9]+]] +; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata [[META14]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32), metadata [[DIASSIGNID21]], metadata ptr [[VAR_SROA_1]], metadata !DIExpression()), !dbg [[DBG20]] +; CHECK-NEXT: [[REF_TMP_SROA_0:%.*]] = alloca i64, align 8 +; CHECK-NEXT: [[CALL:%.*]] = call i64 @_Z6getVarv(), !dbg [[DBG22:![0-9]+]] +; CHECK-NEXT: store i64 [[CALL]], ptr [[REF_TMP_SROA_0]], align 8, !dbg [[DBG22]] +; CHECK-NEXT: [[REF_TMP_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr [[REF_TMP_SROA_0]], align 8, !dbg [[DBG23:![0-9]+]] +; CHECK-NEXT: [[VAR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[REF_TMP_SROA_0_0_COPYLOAD]] to i32, !dbg [[DBG23]] +; CHECK-NEXT: store i32 [[VAR_SROA_0_0_EXTRACT_TRUNC]], ptr [[VAR_SROA_0]], align 4, !dbg [[DBG23]] +; CHECK-NEXT: [[VAR_SROA_1_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[REF_TMP_SROA_0_0_COPYLOAD]], 32, !dbg [[DBG23]] +; CHECK-NEXT: [[VAR_SROA_1_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[VAR_SROA_1_0_EXTRACT_SHIFT]] to i32, !dbg [[DBG23]] +; CHECK-NEXT: store i32 [[VAR_SROA_1_0_EXTRACT_TRUNC]], ptr [[VAR_SROA_1]], align 4, !dbg [[DBG23]], !DIAssignID [[DIASSIGNID24:![0-9]+]] +; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 [[VAR_SROA_1_0_EXTRACT_TRUNC]], metadata [[META14]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32), metadata [[DIASSIGNID24]], metadata ptr [[VAR_SROA_1]], metadata !DIExpression()), !dbg [[DBG20]] +; CHECK-NEXT: [[VAR_SROA_1_4_:%.*]] = load i32, ptr [[VAR_SROA_1]], align 4, !dbg [[DBG25:![0-9]+]] +; CHECK-NEXT: ret i32 [[VAR_SROA_1_4_]], !dbg [[DBG26:![0-9]+]] +; +entry: + %var = alloca %struct.Pair, align 4, !DIAssignID !19 + call void @llvm.dbg.assign(metadata i1 undef, metadata !14, metadata !DIExpression(), metadata !19, metadata ptr %var, metadata !DIExpression()), !dbg !20 + %ref.tmp = alloca %struct.Pair, align 4 + %call = call i64 @_Z6getVarv(), !dbg !22 + store i64 %call, ptr %ref.tmp, align 4, !dbg !22 + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %var, ptr align 4 %ref.tmp, i64 8, i1 false), !dbg !23, !DIAssignID !29 + call void @llvm.dbg.assign(metadata i1 undef, metadata !14, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32), metadata !29, metadata ptr %var, metadata !DIExpression()), !dbg !20 + call void @llvm.dbg.assign(metadata i1 undef, metadata !14, metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32), metadata !29, metadata ptr %var, metadata !DIExpression(DW_OP_plus, DW_OP_constu, 4)), !dbg !20 + %b = getelementptr inbounds %struct.Pair, ptr %var, i32 0, i32 1, !dbg !31 + %0 = load i32, ptr %b, align 4, !dbg !31 + ret i32 %0, !dbg !35 +} + +declare !dbg !36 i64 @_Z6getVarv() +declare void @llvm.dbg.declare(metadata, metadata, metadata) +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) +declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7} +!llvm.ident = !{!8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.cpp", directory: "/") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 8, !"PIC Level", i32 2} +!6 = !{i32 7, !"PIE Level", i32 2} +!7 = !{i32 7, !"uwtable", i32 2} +!8 = !{!"clang version 16.0.0"} +!9 = distinct !DISubprogram(name: "fun", linkageName: "_Z3funv", scope: !1, file: !1, line: 3, type: !10, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13) +!10 = !DISubroutineType(types: !11) +!11 = !{!12} +!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !{!14} +!14 = !DILocalVariable(name: "var", scope: !9, file: !1, line: 4, type: !15) +!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Pair", file: !1, line: 1, size: 64, flags: DIFlagTypePassByValue, elements: !16, identifier: "_ZTS4Pair") +!16 = !{!17, !18} +!17 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !15, file: !1, line: 1, baseType: !12, size: 32) +!18 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !15, file: !1, line: 1, baseType: !12, size: 32, offset: 32) +!19 = distinct !DIAssignID() +!20 = !DILocation(line: 0, scope: !9) +!21 = !DILocation(line: 4, column: 3, scope: !9) +!22 = !DILocation(line: 5, column: 9, scope: !9) +!23 = !DILocation(line: 5, column: 7, scope: !9) +!29 = distinct !DIAssignID() +!30 = !DILocation(line: 5, column: 3, scope: !9) +!31 = !DILocation(line: 6, column: 14, scope: !9) +!34 = !DILocation(line: 7, column: 1, scope: !9) +!35 = !DILocation(line: 6, column: 3, scope: !9) +!36 = !DISubprogram(name: "getVar", linkageName: "_Z6getVarv", scope: !1, file: !1, line: 2, type: !37, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !39) +!37 = !DISubroutineType(types: !38) +!38 = !{!15} +!39 = !{} -- Gitee From 52f0037d657bf32e930b416c0552f63656edb967 Mon Sep 17 00:00:00 2001 From: Orlando Cazalet-Hyams Date: Fri, 17 Oct 2025 17:19:43 +0100 Subject: [PATCH 2/2] [NFC][SROA][DebugInfo] Reuse existing dbg_assigns where possible (#163938) Addresses issue #145937 Without this patch SROA generates new dbg_assign for new stores. We can simply steal the existing dbg_assigns linked to the old store when the store is not being split. --- llvm/lib/Transforms/Scalar/SROA.cpp | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 877c07ac36e9..e144bb26e643 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -189,6 +189,12 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL) { + // If we want allocas to be migrated using this helper then we need to ensure + // that the BaseFragments map code still works. A simple solution would be + // to choose to always clone alloca dbg_assigns (rather than sometimes + // "stealing" them). + assert(!isa(Inst) && "Unexpected alloca"); + auto MarkerRange = at::getAssignmentMarkers(OldInst); // Nothing to do if OldInst has no linked dbg.assign intrinsics. if (MarkerRange.empty()) @@ -266,10 +272,21 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, Inst->setMetadata(LLVMContext::MD_DIAssignID, NewID); } - Value = Value ? Value : DbgAssign->getValue(); - auto *NewAssign = DIB.insertDbgAssign( + DbgAssignIntrinsic *NewAssign; + if (IsSplit) { + Value = Value ? Value : DbgAssign->getValue(); + NewAssign = DIB.insertDbgAssign( Inst, Value, DbgAssign->getVariable(), Expr, Dest, DIExpression::get(Ctx, std::nullopt), DbgAssign->getDebugLoc()); + } else { + // The store is not split, simply steal the existing dbg_assign. + NewAssign = DbgAssign; + NewAssign->setAssignId(NewID); // FIXME: Can we avoid generating new IDs? + NewAssign->setAddress(Dest); + if (Value) + NewAssign->replaceVariableLocationOp(0u, Value); + assert(Expr == NewAssign->getExpression()); + } // We could use more precision here at the cost of some additional (code) // complexity - if the original dbg.assign was adjacent to its store, we @@ -284,9 +301,10 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, // noted as slightly offset (in code) from the store. In practice this // should have little effect on the debugging experience due to the fact // that all the split stores should get the same line number. - NewAssign->moveBefore(DbgAssign); - - NewAssign->setDebugLoc(DbgAssign->getDebugLoc()); + if (NewAssign != DbgAssign) { + NewAssign->moveBefore(DbgAssign); + NewAssign->setDebugLoc(DbgAssign->getDebugLoc()); + } LLVM_DEBUG(dbgs() << "Created new assign intrinsic: " << *NewAssign << "\n"); } -- Gitee