From d50c1385822c24abc152f427d0ed5dc21dc6cca3 Mon Sep 17 00:00:00 2001 From: eastb233 Date: Fri, 5 Sep 2025 16:21:22 +0800 Subject: [PATCH 1/2] [NFC] Fix no-plt test cases --- llvm/test/CodeGen/AArch64/fno-plt.c | 6 ++--- llvm/test/CodeGen/AArch64/fno-plt.cpp | 35 --------------------------- 2 files changed, 3 insertions(+), 38 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/fno-plt.cpp diff --git a/llvm/test/CodeGen/AArch64/fno-plt.c b/llvm/test/CodeGen/AArch64/fno-plt.c index 81ed912ce927..6cc59b683da2 100644 --- a/llvm/test/CodeGen/AArch64/fno-plt.c +++ b/llvm/test/CodeGen/AArch64/fno-plt.c @@ -1,8 +1,8 @@ -// RUN: clang %s -shared -fno-plt -O2 -fno-inline -fPIC -o noplt.so +// RUN: clang %s -shared -fno-plt -O2 -fno-inline -fPIC --target=aarch64-linux-gnu -fuse-ld=lld -nostdlib -o noplt.so // RUN: llvm-objdump -d noplt.so | FileCheck %s --check-prefix=CHECK-NO-PLT -// RUN: clang %s -shared -O2 -fno-inline -fPIC -o plt.so -// RUN: llvm-objdump -d plt.so | FileCheck %s --check-prefix=CHECK-PLT +// RUN: clang %s -shared -O2 -fno-inline -fPIC --target=aarch64-linux-gnu -fuse-ld=lld -nostdlib -o plt.so +// RUN: llvm-objdump -d plt.so | FileCheck %s --check-prefix=CHECK-PLT // CHECK-PLT: bar@plt // CHECK-PLT: bar1@plt diff --git a/llvm/test/CodeGen/AArch64/fno-plt.cpp b/llvm/test/CodeGen/AArch64/fno-plt.cpp deleted file mode 100644 index c5a1f2f24b37..000000000000 --- a/llvm/test/CodeGen/AArch64/fno-plt.cpp +++ /dev/null @@ -1,35 +0,0 @@ -// RUN: clang -x c++ %s -shared -fno-plt -O2 -fno-inline -fPIC -o noplt.so -// RUN: llvm-objdump -d noplt.so | FileCheck %s --check-prefix=CHECK-NO-PLT - -// RUN: clang -x c++ %s -shared -O0 -fPIC -o plt.so -// RUN: llvm-objdump -d plt.so | FileCheck %s --check-prefix=CHECK-PLT - -// RUN: clang -x c++ %s -shared -O2 -fno-inline -fPIC -o plt.so -// RUN: llvm-objdump -d plt.so | FileCheck %s --check-prefix=CHECK-PLT - -// CHECK-PLT: bar@plt -// CHECK-PLT: bar1@plt -// CHECK-NO-PLT-NOT: bar@plt -// CHECK-NO-PLT-NOT: bar1@plt -// CHECK-NO-PLT-NOT: bar2@plt - -__attribute__((optnone)) -void bar(int a) { - return; -} - -__attribute__((optnone)) -extern void bar1(int); - -__attribute__((optnone)) -static void bar2(int a) { - return; -} - -void foo(int a) { - bar(a); - bar1(a); - bar2(a); - return; -} - -- Gitee From e03cba811c022300f4622d85a767ae89227e2031 Mon Sep 17 00:00:00 2001 From: John Brawn Date: Mon, 12 May 2025 10:15:22 +0100 Subject: [PATCH 2/2] [LoopVersioningLICM] Only mark pointers with generated checks as noalias (#135168) Currently when we version a loop all loads and stores have the noalias metadata added to them. If there were some pointers that could not be analysed, and thus we could not generate runtime aliasing checks for, then we should not mark loads and stores using these pointers as noalias. This is done by getting rid of setNoAliasToLoop and instead using annotateLoopWithNoAlias, as that already correctly handles partial alias information. This does result in slightly different aliasing metadata being generated, but it looks like it's more precise. Currently this doesn't result in any change to the transforms that LoopVersioningLICM does, as LoopAccessAnalysis discards all results if it couldn't analyse every pointer leading to no loop versioning happening, but an upcoming patch will change that and we need this first otherwise we incorrectly mark some pointers as noalias even when they aren't. --- .../Transforms/Scalar/LoopVersioningLICM.cpp | 59 +--- .../load-from-unknown-address.ll | 307 ++++++++++++++++++ .../LoopVersioningLICM/loopversioningLICM1.ll | 8 +- .../LoopVersioningLICM/loopversioningLICM2.ll | 2 +- 4 files changed, 327 insertions(+), 49 deletions(-) create mode 100644 llvm/test/Transforms/LoopVersioningLICM/load-from-unknown-address.ll diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp index e9d3a5a5cb79..7572adea9352 100644 --- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -207,7 +207,6 @@ private: bool legalLoopInstructions(); bool legalLoopMemoryAccesses(); bool isLoopAlreadyVisited(); - void setNoAliasToLoop(Loop *VerLoop); bool instructionSafeForVersioning(Instruction *I); bool legalLoopVersioningOverlap(); }; @@ -383,6 +382,13 @@ bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) { } LoadAndStoreCounter++; Value *Ptr = St->getPointerOperand(); + // Don't allow stores that we don't have runtime checks for, as we won't be + // able to mark them noalias meaning they would prevent any code motion. + auto &Pointers = LAI->getRuntimePointerChecking()->Pointers; + if (!any_of(Pointers, [&](auto &P) { return P.PointerValue == Ptr; })) { + LLVM_DEBUG(dbgs() << " Found a store without a runtime check.\n"); + return false; + } // Check loop invariant. if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) InvariantCounter++; @@ -400,6 +406,13 @@ bool LoopVersioningLICM::legalLoopInstructions() { InvariantCounter = 0; IsReadOnlyLoop = true; using namespace ore; + // Get LoopAccessInfo from current loop via the proxy. + LAI = &LAIs.getInfo(*CurLoop); + // Check LoopAccessInfo for need of runtime check. + if (LAI->getRuntimePointerChecking()->getChecks().empty()) { + LLVM_DEBUG(dbgs() << " LAA: Runtime check not found !!\n"); + return false; + } // Iterate over loop blocks and instructions of each block and check // instruction safety. for (auto *Block : CurLoop->getBlocks()) @@ -413,13 +426,6 @@ bool LoopVersioningLICM::legalLoopInstructions() { return false; } } - // Get LoopAccessInfo from current loop via the proxy. - LAI = &LAIs.getInfo(*CurLoop); - // Check LoopAccessInfo for need of runtime check. - if (LAI->getRuntimePointerChecking()->getChecks().empty()) { - LLVM_DEBUG(dbgs() << " LAA: Runtime check not found !!\n"); - return false; - } // Number of runtime-checks should be less then RuntimeMemoryCheckThreshold if (LAI->getNumRuntimePointerChecks() > VectorizerParams::RuntimeMemoryCheckThreshold) { @@ -616,41 +622,6 @@ bool LoopVersioningLICM::isLegalForVersioning() { return true; } -/// Update loop with aggressive aliasing assumptions. -/// It marks no-alias to any pairs of memory operations by assuming -/// loop should not have any must-alias memory accesses pairs. -/// During LoopVersioningLICM legality we ignore loops having must -/// aliasing memory accesses. -void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) { - // Get latch terminator instruction. - Instruction *I = VerLoop->getLoopLatch()->getTerminator(); - // Create alias scope domain. - MDBuilder MDB(I->getContext()); - MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("LVDomain"); - StringRef Name = "LVAliasScope"; - MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); - SmallVector Scopes{NewScope}, NoAliases{NewScope}; - // Iterate over each instruction of loop. - // set no-alias for all load & store instructions. - for (auto *Block : CurLoop->getBlocks()) { - for (auto &Inst : *Block) { - // Only interested in instruction that may modify or read memory. - if (!Inst.mayReadFromMemory() && !Inst.mayWriteToMemory()) - continue; - // Set no-alias for current instruction. - Inst.setMetadata( - LLVMContext::MD_noalias, - MDNode::concatenate(Inst.getMetadata(LLVMContext::MD_noalias), - MDNode::get(Inst.getContext(), NoAliases))); - // set alias-scope for current instruction. - Inst.setMetadata( - LLVMContext::MD_alias_scope, - MDNode::concatenate(Inst.getMetadata(LLVMContext::MD_alias_scope), - MDNode::get(Inst.getContext(), Scopes))); - } - } -} - bool LoopVersioningLICMLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { if (skipLoop(L)) return false; @@ -757,7 +728,7 @@ bool LoopVersioningLICM::run(DominatorTree *DT) { addStringMetadataToLoop(LVer.getVersionedLoop(), "llvm.mem.parallel_loop_access"); // Update version loop with aggressive aliasing assumption. - setNoAliasToLoop(LVer.getVersionedLoop()); + LVer.annotateLoopWithNoAlias(); Changed = true; } return Changed; diff --git a/llvm/test/Transforms/LoopVersioningLICM/load-from-unknown-address.ll b/llvm/test/Transforms/LoopVersioningLICM/load-from-unknown-address.ll new file mode 100644 index 000000000000..c0d9f062a99f --- /dev/null +++ b/llvm/test/Transforms/LoopVersioningLICM/load-from-unknown-address.ll @@ -0,0 +1,307 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -S -passes='function(loop-versioning-licm,loop-mssa(licm))' | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" + +; In these tests we have a loop where we can calculate the bounds of some memory +; accesses but not others. + +; Load from a gep whose bounds can't be calculated as the offset is loaded from memory +; FIXME: Not knowing the bounds of the gep shouldn't stop us from hoisting the load of rval +define void @gep_loaded_offset(ptr %p, ptr %q, ptr %r, i32 %n) { +; CHECK-LABEL: define void @gep_loaded_offset +; CHECK-SAME: (ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[N]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[P]], [[ENTRY]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_ADDR]], -1 +; CHECK-NEXT: [[RVAL:%.*]] = load i64, ptr [[R]], align 4 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[RVAL]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_ADDR]], i64 4 +; CHECK-NEXT: store i32 [[VAL]], ptr [[P_ADDR]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.end: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n.addr = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ] + %dec = add nsw i32 %n.addr, -1 + %rval = load i64, ptr %r, align 4 + %arrayidx = getelementptr inbounds i32, ptr %q, i64 %rval + %val = load i32, ptr %arrayidx, align 4 + %incdec.ptr = getelementptr inbounds i8, ptr %p.addr, i64 4 + store i32 %val, ptr %p.addr, align 4 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} + +; As above but with a store to the loaded address. This should prevent the loop +; from being versioned, as we wouldn't be able to do any code motion. +define void @gep_loaded_offset_with_store(ptr %p, ptr %q, ptr %r, i32 %n) { +; CHECK-LABEL: define void @gep_loaded_offset_with_store +; CHECK-SAME: (ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[N]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[P]], [[ENTRY]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_ADDR]], -1 +; CHECK-NEXT: [[RVAL:%.*]] = load i64, ptr [[R]], align 4 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[RVAL]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_ADDR]], i64 4 +; CHECK-NEXT: store i32 [[VAL]], ptr [[P_ADDR]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.end: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n.addr = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ] + %dec = add nsw i32 %n.addr, -1 + %rval = load i64, ptr %r, align 4 + %arrayidx = getelementptr inbounds i32, ptr %q, i64 %rval + %val = load i32, ptr %arrayidx, align 4 + store i32 0, ptr %arrayidx, align 4 + %incdec.ptr = getelementptr inbounds i8, ptr %p.addr, i64 4 + store i32 %val, ptr %p.addr, align 4 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} + +; Load from a gep whose bounds can't be calculated as the pointer is loaded from memory +; FIXME: Not knowing the bounds of the gep shouldn't stop us from hoisting the load of rval +define void @gep_loaded_base(ptr %p, ptr %q, ptr %r, i32 %n) { +; CHECK-LABEL: define void @gep_loaded_base +; CHECK-SAME: (ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[N]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[P]], [[ENTRY]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_ADDR]], -1 +; CHECK-NEXT: [[RVAL:%.*]] = load ptr, ptr [[R]], align 4 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[RVAL]], i64 0 +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_ADDR]], i64 4 +; CHECK-NEXT: store i32 [[VAL]], ptr [[P_ADDR]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.end: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n.addr = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ] + %dec = add nsw i32 %n.addr, -1 + %rval = load ptr, ptr %r, align 4 + %arrayidx = getelementptr inbounds i32, ptr %rval, i64 0 + %val = load i32, ptr %arrayidx, align 4 + %incdec.ptr = getelementptr inbounds i8, ptr %p.addr, i64 4 + store i32 %val, ptr %p.addr, align 4 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} + +; Load from a gep with an offset that scalar evolution can't describe +; FIXME: Not knowing the bounds of the gep shouldn't stop us from hoisting the load of qval +define void @gep_strange_offset(ptr %p, ptr %q, ptr %r, i32 %n) { +; CHECK-LABEL: define void @gep_strange_offset +; CHECK-SAME: (ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[N]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[P]], [[ENTRY]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_ADDR]], -1 +; CHECK-NEXT: [[QVAL:%.*]] = load i32, ptr [[Q]], align 4 +; CHECK-NEXT: [[REM:%.*]] = srem i32 [[DEC]], 2 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[REM]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[IDXPROM]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[VAL]], [[QVAL]] +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_ADDR]], i64 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[P_ADDR]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.end: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n.addr = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ] + %dec = add nsw i32 %n.addr, -1 + %qval = load i32, ptr %q, align 4 + %rem = srem i32 %dec, 2 + %idxprom = sext i32 %rem to i64 + %arrayidx = getelementptr inbounds i32, ptr %r, i64 %idxprom + %val = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %val, %qval + %incdec.ptr = getelementptr inbounds i8, ptr %p.addr, i64 4 + store i32 %add, ptr %p.addr, align 4 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} + +; A memcpy-like loop where the source address is loaded from a pointer +; FIXME: We should be able to hoist the load of the source address pointer +define void @memcpy_load_src(ptr %dst, ptr %src, i32 %n) { +; CHECK-LABEL: define void @memcpy_load_src +; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[N_VAL:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[N]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[DST_VAL:%.*]] = phi ptr [ [[DST_VAL_NEXT:%.*]], [[WHILE_BODY]] ], [ [[DST]], [[ENTRY]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_VAL]], -1 +; CHECK-NEXT: [[SRC_VAL:%.*]] = load ptr, ptr [[SRC]], align 8 +; CHECK-NEXT: [[SRC_VAL_NEXT:%.*]] = getelementptr inbounds i8, ptr [[SRC_VAL]], i64 1 +; CHECK-NEXT: [[DST_VAL_NEXT]] = getelementptr inbounds i8, ptr [[DST_VAL]], i64 1 +; CHECK-NEXT: store ptr [[SRC_VAL_NEXT]], ptr [[SRC]], align 8 +; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[SRC_VAL]], align 1 +; CHECK-NEXT: store i8 [[VAL]], ptr [[DST_VAL]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.end: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n_val = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %dst_val = phi ptr [ %dst_val.next, %while.body ], [ %dst, %entry ] + %dec = add nsw i32 %n_val, -1 + %src_val = load ptr, ptr %src, align 8 + %src_val.next = getelementptr inbounds i8, ptr %src_val, i64 1 + %dst_val.next = getelementptr inbounds i8, ptr %dst_val, i64 1 + store ptr %src_val.next, ptr %src, align 8 + %val = load i8, ptr %src_val, align 1 + store i8 %val, ptr %dst_val, align 1 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} + +; A memcpy-like loop where the destination address is loaded from a pointer +; FIXME: We could hoist the load of the destination address, but doing the +; bounds check of the store through that pointer itself requires using the +; hoisted load. +define void @memcpy_load_dst(ptr %dst, ptr %src, i32 %n) { +; CHECK-LABEL: define void @memcpy_load_dst +; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[N_VAL:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[N]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SRC_VAL:%.*]] = phi ptr [ [[SRC_VAL_NEXT:%.*]], [[WHILE_BODY]] ], [ [[SRC]], [[ENTRY]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_VAL]], -1 +; CHECK-NEXT: [[DST_VAL:%.*]] = load ptr, ptr [[DST]], align 8 +; CHECK-NEXT: [[SRC_VAL_NEXT]] = getelementptr inbounds i8, ptr [[SRC_VAL]], i64 1 +; CHECK-NEXT: [[DST_VAL_NEXT:%.*]] = getelementptr inbounds i8, ptr [[DST_VAL]], i64 1 +; CHECK-NEXT: store ptr [[DST_VAL_NEXT]], ptr [[DST]], align 8 +; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[SRC_VAL]], align 1 +; CHECK-NEXT: store i8 [[VAL]], ptr [[DST_VAL]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.end: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n_val = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %src_val = phi ptr [ %src_val.next, %while.body ], [ %src, %entry ] + %dec = add nsw i32 %n_val, -1 + %dst_val = load ptr, ptr %dst, align 8 + %src_val.next = getelementptr inbounds i8, ptr %src_val, i64 1 + %dst_val.next = getelementptr inbounds i8, ptr %dst_val, i64 1 + store ptr %dst_val.next, ptr %dst, align 8 + %val = load i8, ptr %src_val, align 1 + store i8 %val, ptr %dst_val, align 1 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} + +; A memcpy-like loop where both the source and destination pointers are loaded from pointers +; FIXME: We could hoist the loads of both addresses, but doing the bounds check +; of the store through the destination address itself requires using the hoisted +; load. +define void @memcpy_load_src_dst(ptr %dst, ptr %src, i32 %n) { +; CHECK-LABEL: define void @memcpy_load_src_dst +; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[N_VAL:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[N]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_VAL]], -1 +; CHECK-NEXT: [[SRC_VAL:%.*]] = load ptr, ptr [[SRC]], align 8 +; CHECK-NEXT: [[DST_VAL:%.*]] = load ptr, ptr [[DST]], align 8 +; CHECK-NEXT: [[SRC_VAL_NEXT:%.*]] = getelementptr inbounds i8, ptr [[SRC_VAL]], i64 1 +; CHECK-NEXT: [[DST_VAL_NEXT:%.*]] = getelementptr inbounds i8, ptr [[DST_VAL]], i64 1 +; CHECK-NEXT: store ptr [[SRC_VAL_NEXT]], ptr [[SRC]], align 8 +; CHECK-NEXT: store ptr [[DST_VAL_NEXT]], ptr [[DST]], align 8 +; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[SRC_VAL]], align 1 +; CHECK-NEXT: store i8 [[VAL]], ptr [[DST_VAL]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.end: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n_val = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %dec = add nsw i32 %n_val, -1 + %src_val = load ptr, ptr %src, align 8 + %dst_val = load ptr, ptr %dst, align 8 + %src_val.next = getelementptr inbounds i8, ptr %src_val, i64 1 + %dst_val.next = getelementptr inbounds i8, ptr %dst_val, i64 1 + store ptr %src_val.next, ptr %src, align 8 + store ptr %dst_val.next, ptr %dst, align 8 + %val = load i8, ptr %src_val, align 1 + store i8 %val, ptr %dst_val, align 1 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} diff --git a/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll index eec772c52bbb..8337a2d2c9c8 100644 --- a/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll +++ b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll @@ -57,23 +57,23 @@ define i32 @foo(ptr nocapture %var1, ptr nocapture readnone %var2, ptr nocapture ; CHECK-NEXT: [[CMP2_LVER_ORIG:%.*]] = icmp ult i32 [[INC_LVER_ORIG]], [[ITR]] ; CHECK-NEXT: br i1 [[CMP2_LVER_ORIG]], label [[FOR_BODY3_LVER_ORIG]], label [[FOR_INC11_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: for.body3.ph: -; CHECK-NEXT: [[ARRAYIDX7_PROMOTED:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !alias.scope !2, !noalias !2 +; CHECK-NEXT: [[ARRAYIDX7_PROMOTED:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !alias.scope !2 ; CHECK-NEXT: br label [[FOR_BODY3:%.*]] ; CHECK: for.body3: ; CHECK-NEXT: [[ADD86:%.*]] = phi i32 [ [[ARRAYIDX7_PROMOTED]], [[FOR_BODY3_PH]] ], [ [[ADD8:%.*]], [[FOR_BODY3]] ] ; CHECK-NEXT: [[J_113:%.*]] = phi i32 [ [[J_016]], [[FOR_BODY3_PH]] ], [ [[INC:%.*]], [[FOR_BODY3]] ] ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[J_113]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VAR1]], i64 [[IDXPROM]] -; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4, !alias.scope !2, !noalias !2 +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4, !alias.scope !5, !noalias !2 ; CHECK-NEXT: [[ADD8]] = add nsw i32 [[ADD86]], [[ADD]] ; CHECK-NEXT: [[INC]] = add nuw i32 [[J_113]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[INC]], [[ITR]] -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_INC11_LOOPEXIT_LOOPEXIT5:%.*]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_INC11_LOOPEXIT_LOOPEXIT5:%.*]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.inc11.loopexit.loopexit: ; CHECK-NEXT: br label [[FOR_INC11_LOOPEXIT:%.*]] ; CHECK: for.inc11.loopexit.loopexit5: ; CHECK-NEXT: [[ADD8_LCSSA:%.*]] = phi i32 [ [[ADD8]], [[FOR_BODY3]] ] -; CHECK-NEXT: store i32 [[ADD8_LCSSA]], ptr [[ARRAYIDX7]], align 4, !alias.scope !2, !noalias !2 +; CHECK-NEXT: store i32 [[ADD8_LCSSA]], ptr [[ARRAYIDX7]], align 4, !alias.scope !2 ; CHECK-NEXT: br label [[FOR_INC11_LOOPEXIT]] ; CHECK: for.inc11.loopexit: ; CHECK-NEXT: br label [[FOR_INC11]] diff --git a/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll index 22ca534be7ae..a31da2a212ea 100644 --- a/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll +++ b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll @@ -9,7 +9,7 @@ ; ; CHECK: for.cond1.for.inc17_crit_edge.us.loopexit5: ; preds = %for.body3.us ; CHECK-NEXT: %add14.us.lcssa = phi float [ %add14.us, %for.body3.us ] -; CHECK-NEXT: store float %add14.us.lcssa, ptr %arrayidx.us, align 4, !alias.scope !0, !noalias !0 +; CHECK-NEXT: store float %add14.us.lcssa, ptr %arrayidx.us, align 4, !alias.scope !3 ; CHECK-NEXT: br label %for.cond1.for.inc17_crit_edge.us ; define i32 @foo(ptr nocapture %var2, ptr nocapture readonly %var3, i32 %itr) #0 { -- Gitee