diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h index 8f4150b2539de2791900be41a208f4e38ab806ee..3313102492cb8dc41b484c650630d5f451992c8c 100644 --- a/clang/lib/Basic/Targets/LoongArch.h +++ b/clang/lib/Basic/Targets/LoongArch.h @@ -40,6 +40,7 @@ public: LongDoubleWidth = 128; LongDoubleAlign = 128; LongDoubleFormat = &llvm::APFloat::IEEEquad(); + MCountName = "_mcount"; SuitableAlign = 128; WCharType = SignedInt; WIntType = UnsignedInt; diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp index 7483bf6d6d1e8e2f91ec2ca5da10a84662ffe031..63b9a1fdb988ceba4142c412bbbd1e0c777ac196 100644 --- a/clang/lib/CodeGen/Targets/LoongArch.cpp +++ b/clang/lib/CodeGen/Targets/LoongArch.cpp @@ -170,10 +170,11 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper( // copy constructor are not eligible for the FP calling convention. if (getRecordArgABI(Ty, CGT.getCXXABI())) return false; - if (isEmptyRecord(getContext(), Ty, true, true)) - return true; const RecordDecl *RD = RTy->getDecl(); - // Unions aren't eligible unless they're empty (which is caught above). + if (isEmptyRecord(getContext(), Ty, true, true) && + (!RD->isUnion() || !isa(RD))) + return true; + // Unions aren't eligible unless they're empty in C (which is caught above). if (RD->isUnion()) return false; const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); @@ -308,12 +309,14 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, CGCXXABI::RAA_DirectInMemory); } - // Ignore empty structs/unions. - if (isEmptyRecord(getContext(), Ty, true)) - return ABIArgInfo::getIgnore(); - uint64_t Size = getContext().getTypeSize(Ty); + // Ignore empty struct or union whose size is zero, e.g. `struct { }` in C or + // `struct { int a[0]; }` in C++. In C++, `struct { }` is empty but it's size + // is 1 byte and g++ doesn't ignore it; clang++ matches this behaviour. + if (isEmptyRecord(getContext(), Ty, true) && Size == 0) + return ABIArgInfo::getIgnore(); + // Pass floating point values via FARs if possible. if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() && FRLen >= Size && FARsLeft) { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 6b5930990f11713da647de740bd3436570b6c415..b21aeaee7f5ada77cf3a8765cdcdb0e2285f7107 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5632,10 +5632,15 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // defaults to -fno-direct-access-external-data. Pass the option if different // from the default. if (Arg *A = Args.getLastArg(options::OPT_fdirect_access_external_data, - options::OPT_fno_direct_access_external_data)) + options::OPT_fno_direct_access_external_data)) { if (A->getOption().matches(options::OPT_fdirect_access_external_data) != (PICLevel == 0)) A->render(Args, CmdArgs); + } else if (PICLevel == 0 && Triple.isLoongArch()) { + // Some targets default to -fno-direct-access-external-data even for + // -fno-pic. + CmdArgs.push_back("-fno-direct-access-external-data"); + } if (Args.hasFlag(options::OPT_fno_plt, options::OPT_fplt, false)) { CmdArgs.push_back("-fno-plt"); diff --git a/clang/lib/Headers/larchintrin.h b/clang/lib/Headers/larchintrin.h index c5c533ee0b8c1d6e2372244fd8fc186e55a0409a..24dd29ce91ffb9a7f7982ad4b3310fc82d1568d2 100644 --- a/clang/lib/Headers/larchintrin.h +++ b/clang/lib/Headers/larchintrin.h @@ -156,7 +156,7 @@ extern __inline unsigned char return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); } -extern __inline unsigned char +extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __iocsrrd_h(unsigned int _1) { return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c index fb90bf556c19b27ebb2b4f362c96b1b5c01bfe48..2f7596f0ebdc8beeeff104ecbbcab6b8d7db1fe6 100644 --- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c @@ -3,7 +3,7 @@ // RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - -x c++ | \ // RUN: FileCheck --check-prefix=CHECK-CXX %s -// Fields containing empty structs or unions are ignored when flattening +// Fields containing empty structs are ignored when flattening // structs to examine whether the structs can be passed via FARs, even in C++. // But there is an exception that non-zero-length array of empty structures are // not ignored in C++. These rules are not documented in psABI @@ -81,9 +81,62 @@ struct s8 test_s8(struct s8 a) { return a; } +/// Note: Below tests check how empty structs are passed while above tests check +/// empty structs as fields of container struct are ignored when flattening +/// structs to examine whether the container structs can be passed via FARs. + // CHECK-C: define{{.*}} void @test_s9() // CHECK-CXX: define{{.*}} i64 @_Z7test_s92s9(i64 {{.*}}) struct s9 { struct empty e; }; struct s9 test_s9(struct s9 a) { return a; } + +// CHECK-C: define{{.*}} void @test_s10() +// CHECK-CXX: define{{.*}} i64 @_Z8test_s103s10(i64 {{.*}}) +struct s10 { }; +struct s10 test_s10(struct s10 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s11() +// CHECK-CXX: define{{.*}} i64 @_Z8test_s113s11(i64 {{.*}}) +struct s11 { struct { } s; }; +struct s11 test_s11(struct s11 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s12() +// CHECK-CXX: define{{.*}} void @_Z8test_s123s12() +struct s12 { int i[0]; }; +struct s12 test_s12(struct s12 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s13() +// CHECK-CXX: define{{.*}} void @_Z8test_s133s13() +struct s13 { struct { } s[0]; }; +struct s13 test_s13(struct s13 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s14() +// CHECK-CXX: define{{.*}} i64 @_Z8test_s143s14(i64 {{.*}}) +struct s14 { struct { } s[1]; }; +struct s14 test_s14(struct s14 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s15() +// CHECK-CXX: define{{.*}} i64 @_Z8test_s153s15(i64 {{.*}}) +struct s15 { int : 0; }; +struct s15 test_s15(struct s15 a) { + return a; +} + +// CHECK-C: define{{.*}} i64 @test_s16(i64 {{.*}}) +// CHECK-CXX: define{{.*}} i64 @_Z8test_s163s16(i64 {{.*}}) +struct s16 { int : 1; }; +struct s16 test_s16(struct s16 a) { + return a; +} diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c new file mode 100644 index 0000000000000000000000000000000000000000..363e37efb64691ef182c9671bf741039dc7071a8 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - | \ +// RUN: FileCheck --check-prefix=CHECK-C %s +// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - -x c++ | \ +// RUN: FileCheck --check-prefix=CHECK-CXX %s + +#include + +// CHECK-C: define{{.*}} void @test1() +// CHECK-CXX: define{{.*}} i64 @_Z5test12u1(i64{{[^,]*}}) +union u1 { }; +union u1 test1(union u1 a) { + return a; +} + +struct s1 { + union u1 u; + int i; + float f; +}; + +// CHECK-C: define{{.*}} { i32, float } @test2(i32{{[^,]*}}, float{{[^,]*}}) +// CHECK-CXX: define{{.*}} [2 x i64] @_Z5test22s1([2 x i64]{{[^,]*}}) +struct s1 test2(struct s1 a) { + return a; +} diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32.c b/clang/test/CodeGen/LoongArch/intrinsic-la32.c index 93d54f511a9cd271695ec066c483ff87f0a03c09..eb3f8cbe7ac4cc252e715d09cc7e9847173f5035 100644 --- a/clang/test/CodeGen/LoongArch/intrinsic-la32.c +++ b/clang/test/CodeGen/LoongArch/intrinsic-la32.c @@ -169,8 +169,8 @@ unsigned int cpucfg(unsigned int a) { // LA32-LABEL: @rdtime( // LA32-NEXT: entry: -// LA32-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc !2 -// LA32-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !3 +// LA32-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc [[META2:![0-9]+]] +// LA32-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META3:![0-9]+]] // LA32-NEXT: ret void // void rdtime() { @@ -201,13 +201,28 @@ void loongarch_movgr2fcsr(int a) { __builtin_loongarch_movgr2fcsr(1, a); } -// CHECK-LABEL: @cacop_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A:%.*]], i32 1024) -// CHECK-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A]], i32 1024) -// CHECK-NEXT: ret void +// LA32-LABEL: @cacop_w( +// LA32-NEXT: entry: +// LA32-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A:%.*]], i32 1024) +// LA32-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A]], i32 1024) +// LA32-NEXT: ret void // void cacop_w(unsigned long int a) { __cacop_w(1, a, 1024); __builtin_loongarch_cacop_w(1, a, 1024); } + +// LA32-LABEL: @iocsrrd_h_result( +// LA32-NEXT: entry: +// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) +// LA32-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP0]] to i16 +// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) +// LA32-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// LA32-NEXT: [[CONV3:%.*]] = add i16 [[TMP2]], [[CONV_I]] +// LA32-NEXT: ret i16 [[CONV3]] +// +unsigned short iocsrrd_h_result(unsigned int a) { + unsigned short b = __iocsrrd_h(a); + unsigned short c = __builtin_loongarch_iocsrrd_h(a); + return b+c; +} diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la64.c b/clang/test/CodeGen/LoongArch/intrinsic-la64.c index a740882eef5411cbb1940ce1538cbea12a672b2e..50ec358f546ec01ff1c08a3f9695ec701fca9ace 100644 --- a/clang/test/CodeGen/LoongArch/intrinsic-la64.c +++ b/clang/test/CodeGen/LoongArch/intrinsic-la64.c @@ -387,7 +387,7 @@ unsigned int cpucfg(unsigned int a) { // CHECK-LABEL: @rdtime_d( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call { i64, i64 } asm sideeffect "rdtime.d $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc !2 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { i64, i64 } asm sideeffect "rdtime.d $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc [[META2:![0-9]+]] // CHECK-NEXT: ret void // void rdtime_d() { @@ -396,8 +396,8 @@ void rdtime_d() { // CHECK-LABEL: @rdtime( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !3 -// CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !4 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META3:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META4:![0-9]+]] // CHECK-NEXT: ret void // void rdtime() { @@ -427,3 +427,18 @@ void loongarch_movgr2fcsr(int a) { __movgr2fcsr(1, a); __builtin_loongarch_movgr2fcsr(1, a); } + +// CHECK-LABEL: @iocsrrd_h_result( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) +// CHECK-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP0]] to i16 +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// CHECK-NEXT: [[CONV3:%.*]] = add i16 [[TMP2]], [[CONV_I]] +// CHECK-NEXT: ret i16 [[CONV3]] +// +unsigned short iocsrrd_h_result(unsigned int a) { + unsigned short b = __iocsrrd_h(a); + unsigned short c = __builtin_loongarch_iocsrrd_h(a); + return b+c; +} diff --git a/clang/test/CodeGen/mcount.c b/clang/test/CodeGen/mcount.c index 8f994ab4e75443a64d2c2336f9fbcaf4750916b1..bdd609c1dfc58262b2e38ccaf6b032cf199636fd 100644 --- a/clang/test/CodeGen/mcount.c +++ b/clang/test/CodeGen/mcount.c @@ -7,6 +7,8 @@ // RUN: %clang_cc1 -pg -triple x86_64-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s // RUN: %clang_cc1 -pg -triple arm-netbsd-eabi -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s // RUN: %clang_cc1 -pg -triple aarch64-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s +// RUN: %clang_cc1 -pg -triple loongarch32 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s +// RUN: %clang_cc1 -pg -triple loongarch64 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s // RUN: %clang_cc1 -pg -triple mips-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s // RUN: %clang_cc1 -pg -triple mips-unknown-gnu-linux -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s // RUN: %clang_cc1 -pg -triple mipsel-unknown-gnu-linux -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s diff --git a/clang/test/Driver/fdirect-access-external-data.c b/clang/test/Driver/fdirect-access-external-data.c index f132b1b088af35dc4e11657ed2f4c3fff41989b9..a6da776e69777421d4b221f092cc7f26fecc7a39 100644 --- a/clang/test/Driver/fdirect-access-external-data.c +++ b/clang/test/Driver/fdirect-access-external-data.c @@ -9,6 +9,12 @@ // RUN: %clang -### -c -target aarch64 %s -fpic 2>&1 | FileCheck %s --check-prefix=DEFAULT // RUN: %clang -### -c -target aarch64 %s -fpic -fdirect-access-external-data 2>&1 | FileCheck %s --check-prefix=DIRECT +/// loongarch* targets default to -fno-direct-access-external-data even for -fno-pic. +// RUN: %clang -### -c --target=loongarch64 -fno-pic %s 2>&1 | FileCheck %s --check-prefix=INDIRECT +// RUN: %clang -### -c --target=loongarch64 -fpie %s 2>&1 | FileCheck %s --check-prefix=DEFAULT +// RUN: %clang -### -c --target=loongarch32 -fno-pic -fdirect-access-external-data %s 2>&1 | FileCheck %s --check-prefix=DEFAULT +// RUN: %clang -### -c --target=loongarch32 -fpie -fdirect-access-external-data %s 2>&1 | FileCheck %s --check-prefix=DIRECT + // DEFAULT-NOT: direct-access-external-data" // DIRECT: "-fdirect-access-external-data" // INDIRECT: "-fno-direct-access-external-data" diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td index 826db54febd382fa052970680e98778ac0592589..65120c083f498dc85ada7fef08e66a199200838a 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -294,8 +294,12 @@ def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, FPR32:$fa)), def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, (fneg FPR32:$fa)), (FNMADD_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; -// fnmsub.s: -fj * fk + fa -def : Pat<(fma (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa), +// fnmsub.s: -(fj * fk - fa) +def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, (fneg FPR32:$fa))), + (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; + +// fnmsub.s: -fj * fk + fa (the nsz flag on the FMA) +def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa), (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; } // Predicates = [HasBasicF] diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td index 5118474725b6c8f311b7ad55b2480f6b69cf5314..437c1e4d7be272525a47db4dcb9b3a560f5fb0cc 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td @@ -256,7 +256,11 @@ def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, (fneg FPR64:$fa)), (FNMADD_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; // fnmsub.d: -(fj * fk - fa) -def : Pat<(fma (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa), +def : Pat<(fneg (fma FPR64:$fj, FPR64:$fk, (fneg FPR64:$fa))), + (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; + +// fnmsub.d: -fj * fk + fa (the nsz flag on the FMA) +def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa), (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; } // Predicates = [HasBasicD] diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index f7eacd56c542f127737c2e5f8c457ebb7f35903f..5affaf37ad5a19ebe3f9ec124986c319b171f647 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -152,8 +152,13 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, // Set libcalls. setLibcallName(RTLIB::MUL_I128, nullptr); + // The MULO libcall is not part of libgcc, only compiler-rt. + setLibcallName(RTLIB::MULO_I64, nullptr); } + // The MULO libcall is not part of libgcc, only compiler-rt. + setLibcallName(RTLIB::MULO_I128, nullptr); + static const ISD::CondCode FPCCToExpand[] = { ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, ISD::SETGE, ISD::SETNE, ISD::SETGT}; @@ -2305,7 +2310,9 @@ Retry: return DAG.getNode( LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy), - DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), + DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1) + : (MaskIdx0 + MaskLen0 - 1), + DL, GRLenVT), DAG.getConstant(MaskIdx0, DL, GRLenVT)); } diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index b2c4bb812ba5e9e22634ab7dbe33570d84a810db..05ae36a9781d93b63277349dd24ebd596cf0b5e1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -586,6 +586,7 @@ class Br_I26 op> : FmtI26 { let isBranch = 1; let isTerminator = 1; + let isBarrier = 1; } } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 @@ -1857,9 +1858,9 @@ defm : PseudoBinPat<"atomic_load_xor_32", PseudoAtomicLoadXor32>; /// Intrinsics def : Pat<(int_loongarch_cacop_d timm:$op, i64:$rj, timm:$imm12), - (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>; + (CACOP timm:$op, GPR:$rj, timm:$imm12)>; def : Pat<(int_loongarch_cacop_w i32:$op, i32:$rj, i32:$imm12), - (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>; + (CACOP timm:$op, GPR:$rj, timm:$imm12)>; def : Pat<(loongarch_dbar uimm15:$imm15), (DBAR uimm15:$imm15)>; def : Pat<(loongarch_ibar uimm15:$imm15), (IBAR uimm15:$imm15)>; def : Pat<(loongarch_break uimm15:$imm15), (BREAK uimm15:$imm15)>; @@ -2023,9 +2024,9 @@ def : Pat<(int_loongarch_asrtle_d GPR:$rj, GPR:$rk), def : Pat<(int_loongarch_asrtgt_d GPR:$rj, GPR:$rk), (ASRTGT_D GPR:$rj, GPR:$rk)>; def : Pat<(int_loongarch_lddir_d GPR:$rj, timm:$imm8), - (LDDIR GPR:$rj, uimm8:$imm8)>; + (LDDIR GPR:$rj, timm:$imm8)>; def : Pat<(int_loongarch_ldpte_d GPR:$rj, timm:$imm8), - (LDPTE GPR:$rj, uimm8:$imm8)>; + (LDPTE GPR:$rj, timm:$imm8)>; } // Predicates = [IsLA64] //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp index 08c0820cb86291200039506e77573c615f0a9db7..09d92ac9aa3a868da94f7c0a0e6027adef33c3d3 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp @@ -263,6 +263,7 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, FixupKind = LoongArch::fixup_loongarch_b21; break; case LoongArch::B: + case LoongArch::BL: FixupKind = LoongArch::fixup_loongarch_b26; break; } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp index 942e667bc2618728e2ddcdcc22ae47d0fed315d9..a4e6a09863e6a68746e72453237365664e8b354c 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp @@ -97,13 +97,90 @@ public: bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, uint64_t &Target) const override { unsigned NumOps = Inst.getNumOperands(); - if (isBranch(Inst) || Inst.getOpcode() == LoongArch::BL) { + if ((isBranch(Inst) && !isIndirectBranch(Inst)) || + Inst.getOpcode() == LoongArch::BL) { Target = Addr + Inst.getOperand(NumOps - 1).getImm(); return true; } return false; } + + bool isTerminator(const MCInst &Inst) const override { + if (MCInstrAnalysis::isTerminator(Inst)) + return true; + + switch (Inst.getOpcode()) { + default: + return false; + case LoongArch::JIRL: + return Inst.getOperand(0).getReg() == LoongArch::R0; + } + } + + bool isCall(const MCInst &Inst) const override { + if (MCInstrAnalysis::isCall(Inst)) + return true; + + switch (Inst.getOpcode()) { + default: + return false; + case LoongArch::JIRL: + return Inst.getOperand(0).getReg() != LoongArch::R0; + } + } + + bool isReturn(const MCInst &Inst) const override { + if (MCInstrAnalysis::isReturn(Inst)) + return true; + + switch (Inst.getOpcode()) { + default: + return false; + case LoongArch::JIRL: + return Inst.getOperand(0).getReg() == LoongArch::R0 && + Inst.getOperand(1).getReg() == LoongArch::R1; + } + } + + bool isBranch(const MCInst &Inst) const override { + if (MCInstrAnalysis::isBranch(Inst)) + return true; + + switch (Inst.getOpcode()) { + default: + return false; + case LoongArch::JIRL: + return Inst.getOperand(0).getReg() == LoongArch::R0 && + Inst.getOperand(1).getReg() != LoongArch::R1; + } + } + + bool isUnconditionalBranch(const MCInst &Inst) const override { + if (MCInstrAnalysis::isUnconditionalBranch(Inst)) + return true; + + switch (Inst.getOpcode()) { + default: + return false; + case LoongArch::JIRL: + return Inst.getOperand(0).getReg() == LoongArch::R0 && + Inst.getOperand(1).getReg() != LoongArch::R1; + } + } + + bool isIndirectBranch(const MCInst &Inst) const override { + if (MCInstrAnalysis::isIndirectBranch(Inst)) + return true; + + switch (Inst.getOpcode()) { + default: + return false; + case LoongArch::JIRL: + return Inst.getOperand(0).getReg() == LoongArch::R0 && + Inst.getOperand(1).getReg() != LoongArch::R1; + } + } }; } // end namespace diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 81309280a44bfa1972e14cea23e9ef135ce0c9db..d11dc605e18833cf38ee537882cb30d0b149c994 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -1462,7 +1462,8 @@ StringRef sys::getHostCPUName() { // Use processor id to detect cpu name. uint32_t processor_id; __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id)); - switch (processor_id & 0xff00) { + // Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h. + switch (processor_id & 0xf000) { case 0xc000: // Loongson 64bit, 4-issue return "la464"; // TODO: Others. diff --git a/llvm/test/CodeGen/LoongArch/bstrins_w.ll b/llvm/test/CodeGen/LoongArch/bstrins_w.ll index dfbe000841cdcbdf5481ddce19a2d6a6987dfcb6..e008caacad2a17adeea14932cac0249c7cc58ba0 100644 --- a/llvm/test/CodeGen/LoongArch/bstrins_w.ll +++ b/llvm/test/CodeGen/LoongArch/bstrins_w.ll @@ -145,6 +145,19 @@ define i32 @pat5(i32 %a) nounwind { ret i32 %or } +;; The high bits of `const` are zero. +define i32 @pat5_high_zeros(i32 %a) nounwind { +; CHECK-LABEL: pat5_high_zeros: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $a1, 1 +; CHECK-NEXT: ori $a1, $a1, 564 +; CHECK-NEXT: bstrins.w $a0, $a1, 31, 16 +; CHECK-NEXT: ret + %and = and i32 %a, 65535 ; 0x0000ffff + %or = or i32 %and, 305397760 ; 0x12340000 + ret i32 %or +} + ;; Pattern 6: a = b | ((c & mask) << shamt) ;; In this testcase b is 0x10000002, but in fact we do not require b being a ;; constant. As long as all positions in b to be overwritten by the incoming diff --git a/llvm/test/CodeGen/LoongArch/double-fma.ll b/llvm/test/CodeGen/LoongArch/double-fma.ll index 6dd62847943375325952d10159ca8468f864ff04..58d20c62a668aec4c0bbfac075aa5166416f38c1 100644 --- a/llvm/test/CodeGen/LoongArch/double-fma.ll +++ b/llvm/test/CodeGen/LoongArch/double-fma.ll @@ -236,13 +236,15 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind { ; LA32-CONTRACT-ON-LABEL: fnmsub_d: ; LA32-CONTRACT-ON: # %bb.0: ; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 -; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2 +; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 ; LA32-CONTRACT-ON-NEXT: ret ; ; LA32-CONTRACT-OFF-LABEL: fnmsub_d: ; LA32-CONTRACT-OFF: # %bb.0: ; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 -; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2 +; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 ; LA32-CONTRACT-OFF-NEXT: ret ; ; LA64-CONTRACT-FAST-LABEL: fnmsub_d: @@ -253,12 +255,98 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind { ; LA64-CONTRACT-ON-LABEL: fnmsub_d: ; LA64-CONTRACT-ON: # %bb.0: ; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 -; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2 +; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 ; LA64-CONTRACT-ON-NEXT: ret ; ; LA64-CONTRACT-OFF-LABEL: fnmsub_d: ; LA64-CONTRACT-OFF: # %bb.0: ; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2 +; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: ret + %negc = fneg double %c + %mul = fmul double %a, %b + %add = fadd double %mul, %negc + %neg = fneg double %add + ret double %neg +} + +define double @fnmsub_d_nsz(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmsub_d_nsz: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmsub_d_nsz: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmsub_d_nsz: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmsub_d_nsz: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmsub_d_nsz: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmsub_d_nsz: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg nsz double %a + %mul = fmul nsz double %nega, %b + %add = fadd nsz double %mul, %c + ret double %add +} + +;; Check that fnmsub.d is not emitted. +define double @not_fnmsub_d(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: not_fnmsub_d: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_fnmsub_d: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_fnmsub_d: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_fnmsub_d: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_fnmsub_d: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_fnmsub_d: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 ; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 ; LA64-CONTRACT-OFF-NEXT: ret %nega = fneg double %a @@ -483,6 +571,86 @@ define double @contract_fnmsub_d(double %a, double %b, double %c) nounwind { ; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_d: ; LA64-CONTRACT-OFF: # %bb.0: ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %negc = fneg contract double %c + %mul = fmul contract double %a, %b + %add = fadd contract double %mul, %negc + %neg = fneg contract double %add + ret double %neg +} + +define double @contract_fnmsub_d_nsz(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: contract_fnmsub_d_nsz: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_fnmsub_d_nsz: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_fnmsub_d_nsz: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_fnmsub_d_nsz: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_fnmsub_d_nsz: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_d_nsz: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg contract nsz double %a + %mul = fmul contract nsz double %nega, %b + %add = fadd contract nsz double %mul, %c + ret double %add +} + +;; Check that fnmsub.d is not emitted. +define double @not_contract_fnmsub_d(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: not_contract_fnmsub_d: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_contract_fnmsub_d: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_contract_fnmsub_d: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_contract_fnmsub_d: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_contract_fnmsub_d: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_contract_fnmsub_d: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-OFF-NEXT: ret %nega = fneg contract double %a %mul = fmul contract double %nega, %b @@ -592,8 +760,8 @@ define double @fnmadd_d_intrinsics(double %a, double %b, double %c) nounwind { ; LA64-CONTRACT-OFF-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-OFF-NEXT: ret %fma = call double @llvm.fma.f64(double %a, double %b, double %c) - %neg = fneg double %fma - ret double %neg + %negfma = fneg double %fma + ret double %negfma } define double @fnmadd_d_nsz_intrinsics(double %a, double %b, double %c) nounwind { @@ -704,44 +872,87 @@ define double @fnmsub_d_intrinsics(double %a, double %b, double %c) nounwind { ; LA64-CONTRACT-OFF-LABEL: fnmsub_d_intrinsics: ; LA64-CONTRACT-OFF: # %bb.0: ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %negc = fneg double %c + %fma = call double @llvm.fma.f64(double %a, double %b, double %negc) + %negfma = fneg double %fma + ret double %negfma +} + +define double @fnmsub_d_nsz_intrinsics(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmsub_d_nsz_intrinsics: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmsub_d_nsz_intrinsics: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmsub_d_nsz_intrinsics: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmsub_d_nsz_intrinsics: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmsub_d_nsz_intrinsics: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmsub_d_nsz_intrinsics: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-OFF-NEXT: ret %nega = fneg double %a - %fma = call double @llvm.fma.f64(double %nega, double %b, double %c) + %fma = call nsz double @llvm.fma.f64(double %nega, double %b, double %c) ret double %fma } -define double @fnmsub_d_swap_intrinsics(double %a, double %b, double %c) nounwind { -; LA32-CONTRACT-FAST-LABEL: fnmsub_d_swap_intrinsics: +;; Check that fnmsub.d is not emitted. +define double @not_fnmsub_d_intrinsics(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: not_fnmsub_d_intrinsics: ; LA32-CONTRACT-FAST: # %bb.0: -; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 +; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ; LA32-CONTRACT-FAST-NEXT: ret ; -; LA32-CONTRACT-ON-LABEL: fnmsub_d_swap_intrinsics: +; LA32-CONTRACT-ON-LABEL: not_fnmsub_d_intrinsics: ; LA32-CONTRACT-ON: # %bb.0: -; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 +; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ; LA32-CONTRACT-ON-NEXT: ret ; -; LA32-CONTRACT-OFF-LABEL: fnmsub_d_swap_intrinsics: +; LA32-CONTRACT-OFF-LABEL: not_fnmsub_d_intrinsics: ; LA32-CONTRACT-OFF: # %bb.0: -; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 +; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ; LA32-CONTRACT-OFF-NEXT: ret ; -; LA64-CONTRACT-FAST-LABEL: fnmsub_d_swap_intrinsics: +; LA64-CONTRACT-FAST-LABEL: not_fnmsub_d_intrinsics: ; LA64-CONTRACT-FAST: # %bb.0: -; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 +; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-FAST-NEXT: ret ; -; LA64-CONTRACT-ON-LABEL: fnmsub_d_swap_intrinsics: +; LA64-CONTRACT-ON-LABEL: not_fnmsub_d_intrinsics: ; LA64-CONTRACT-ON: # %bb.0: -; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 +; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-ON-NEXT: ret ; -; LA64-CONTRACT-OFF-LABEL: fnmsub_d_swap_intrinsics: +; LA64-CONTRACT-OFF-LABEL: not_fnmsub_d_intrinsics: ; LA64-CONTRACT-OFF: # %bb.0: -; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 +; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-OFF-NEXT: ret - %negb = fneg double %b - %fma = call double @llvm.fma.f64(double %a, double %negb, double %c) + %nega = fneg double %a + %fma = call double @llvm.fma.f64(double %nega, double %b, double %c) ret double %fma } @@ -882,6 +1093,8 @@ define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind { ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-OFF-NEXT: ret %mul = fmul contract double %a, %b - %sub = fsub contract double %c, %mul - ret double %sub + %negc = fneg contract double %c + %add = fadd contract double %negc, %mul + %negadd = fneg contract double %add + ret double %negadd } diff --git a/llvm/test/CodeGen/LoongArch/float-fma.ll b/llvm/test/CodeGen/LoongArch/float-fma.ll index 54dc56784006f1270a1d3003e4740fa27cf63b6f..c236255d971a208bba1db9d6c9733d889eaa4027 100644 --- a/llvm/test/CodeGen/LoongArch/float-fma.ll +++ b/llvm/test/CodeGen/LoongArch/float-fma.ll @@ -236,13 +236,15 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind { ; LA32-CONTRACT-ON-LABEL: fnmsub_s: ; LA32-CONTRACT-ON: # %bb.0: ; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 -; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2 +; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 ; LA32-CONTRACT-ON-NEXT: ret ; ; LA32-CONTRACT-OFF-LABEL: fnmsub_s: ; LA32-CONTRACT-OFF: # %bb.0: ; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 -; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2 +; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 ; LA32-CONTRACT-OFF-NEXT: ret ; ; LA64-CONTRACT-FAST-LABEL: fnmsub_s: @@ -253,12 +255,98 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind { ; LA64-CONTRACT-ON-LABEL: fnmsub_s: ; LA64-CONTRACT-ON: # %bb.0: ; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 -; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2 +; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 ; LA64-CONTRACT-ON-NEXT: ret ; ; LA64-CONTRACT-OFF-LABEL: fnmsub_s: ; LA64-CONTRACT-OFF: # %bb.0: ; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2 +; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: ret + %negc = fneg float %c + %mul = fmul float %a, %b + %add = fadd float %mul, %negc + %neg = fneg float %add + ret float %neg +} + +define float @fnmsub_s_nsz(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmsub_s_nsz: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmsub_s_nsz: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmsub_s_nsz: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmsub_s_nsz: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmsub_s_nsz: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmsub_s_nsz: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg nsz float %a + %mul = fmul nsz float %nega, %b + %add = fadd nsz float %mul, %c + ret float %add +} + +;; Check that fnmsub.s is not emitted. +define float @not_fnmsub_s(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: not_fnmsub_s: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_fnmsub_s: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_fnmsub_s: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_fnmsub_s: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_fnmsub_s: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_fnmsub_s: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 ; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 ; LA64-CONTRACT-OFF-NEXT: ret %nega = fneg float %a @@ -483,6 +571,86 @@ define float @contract_fnmsub_s(float %a, float %b, float %c) nounwind { ; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_s: ; LA64-CONTRACT-OFF: # %bb.0: ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %negc = fneg contract float %c + %mul = fmul contract float %a, %b + %add = fadd contract float %mul, %negc + %neg = fneg contract float %add + ret float %neg +} + +define float @contract_fnmsub_s_nsz(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: contract_fnmsub_s_nsz: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_fnmsub_s_nsz: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_fnmsub_s_nsz: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_fnmsub_s_nsz: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_fnmsub_s_nsz: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_s_nsz: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg contract nsz float %a + %mul = fmul contract nsz float %nega, %b + %add = fadd contract nsz float %mul, %c + ret float %add +} + +;; Check that fnmsub.s is not emitted. +define float @not_contract_fnmsub_s(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: not_contract_fnmsub_s: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_contract_fnmsub_s: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_contract_fnmsub_s: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_contract_fnmsub_s: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_contract_fnmsub_s: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_contract_fnmsub_s: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-OFF-NEXT: ret %nega = fneg contract float %a %mul = fmul contract float %nega, %b @@ -592,8 +760,8 @@ define float @fnmadd_s_intrinsics(float %a, float %b, float %c) nounwind { ; LA64-CONTRACT-OFF-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-OFF-NEXT: ret %fma = call float @llvm.fma.f64(float %a, float %b, float %c) - %neg = fneg float %fma - ret float %neg + %negfma = fneg float %fma + ret float %negfma } define float @fnmadd_s_nsz_intrinsics(float %a, float %b, float %c) nounwind { @@ -704,44 +872,87 @@ define float @fnmsub_s_intrinsics(float %a, float %b, float %c) nounwind { ; LA64-CONTRACT-OFF-LABEL: fnmsub_s_intrinsics: ; LA64-CONTRACT-OFF: # %bb.0: ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %negc = fneg float %c + %fma = call float @llvm.fma.f64(float %a, float %b, float %negc) + %negfma = fneg float %fma + ret float %negfma +} + +define float @fnmsub_s_nsz_intrinsics(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmsub_s_nsz_intrinsics: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmsub_s_nsz_intrinsics: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmsub_s_nsz_intrinsics: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmsub_s_nsz_intrinsics: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmsub_s_nsz_intrinsics: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmsub_s_nsz_intrinsics: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-OFF-NEXT: ret %nega = fneg float %a - %fma = call float @llvm.fma.f64(float %nega, float %b, float %c) + %fma = call nsz float @llvm.fma.f64(float %nega, float %b, float %c) ret float %fma } -define float @fnmsub_s_swap_intrinsics(float %a, float %b, float %c) nounwind { -; LA32-CONTRACT-FAST-LABEL: fnmsub_s_swap_intrinsics: +;; Check that fnmsub.s is not emitted. +define float @not_fnmsub_s_intrinsics(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: not_fnmsub_s_intrinsics: ; LA32-CONTRACT-FAST: # %bb.0: -; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 +; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ; LA32-CONTRACT-FAST-NEXT: ret ; -; LA32-CONTRACT-ON-LABEL: fnmsub_s_swap_intrinsics: +; LA32-CONTRACT-ON-LABEL: not_fnmsub_s_intrinsics: ; LA32-CONTRACT-ON: # %bb.0: -; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 +; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ; LA32-CONTRACT-ON-NEXT: ret ; -; LA32-CONTRACT-OFF-LABEL: fnmsub_s_swap_intrinsics: +; LA32-CONTRACT-OFF-LABEL: not_fnmsub_s_intrinsics: ; LA32-CONTRACT-OFF: # %bb.0: -; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 +; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ; LA32-CONTRACT-OFF-NEXT: ret ; -; LA64-CONTRACT-FAST-LABEL: fnmsub_s_swap_intrinsics: +; LA64-CONTRACT-FAST-LABEL: not_fnmsub_s_intrinsics: ; LA64-CONTRACT-FAST: # %bb.0: -; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 +; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-FAST-NEXT: ret ; -; LA64-CONTRACT-ON-LABEL: fnmsub_s_swap_intrinsics: +; LA64-CONTRACT-ON-LABEL: not_fnmsub_s_intrinsics: ; LA64-CONTRACT-ON: # %bb.0: -; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 +; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-ON-NEXT: ret ; -; LA64-CONTRACT-OFF-LABEL: fnmsub_s_swap_intrinsics: +; LA64-CONTRACT-OFF-LABEL: not_fnmsub_s_intrinsics: ; LA64-CONTRACT-OFF: # %bb.0: -; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 +; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-OFF-NEXT: ret - %negb = fneg float %b - %fma = call float @llvm.fma.f64(float %a, float %negb, float %c) + %nega = fneg float %a + %fma = call float @llvm.fma.f64(float %nega, float %b, float %c) ret float %fma } @@ -882,6 +1093,8 @@ define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind { ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-OFF-NEXT: ret %mul = fmul contract float %a, %b - %sub = fsub contract float %c, %mul - ret float %sub + %negc = fneg contract float %c + %add = fadd contract float %negc, %mul + %negadd = fneg contract float %add + ret float %negadd } diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll new file mode 100644 index 0000000000000000000000000000000000000000..6cba4108d63c6f8ec5875b2bec8b73a608a1d6d8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll @@ -0,0 +1,439 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { +; LA32-LABEL: smuloi64: +; LA32: # %bb.0: +; LA32-NEXT: srai.w $a5, $a1, 31 +; LA32-NEXT: mul.w $a6, $a2, $a5 +; LA32-NEXT: mulh.wu $a7, $a2, $a5 +; LA32-NEXT: add.w $a7, $a7, $a6 +; LA32-NEXT: mul.w $a5, $a3, $a5 +; LA32-NEXT: add.w $a5, $a7, $a5 +; LA32-NEXT: srai.w $a7, $a3, 31 +; LA32-NEXT: mul.w $t0, $a7, $a1 +; LA32-NEXT: mulh.wu $t1, $a7, $a0 +; LA32-NEXT: add.w $t0, $t1, $t0 +; LA32-NEXT: mul.w $a7, $a7, $a0 +; LA32-NEXT: add.w $t0, $t0, $a7 +; LA32-NEXT: add.w $a5, $t0, $a5 +; LA32-NEXT: mulh.wu $t0, $a0, $a2 +; LA32-NEXT: mul.w $t1, $a1, $a2 +; LA32-NEXT: add.w $t0, $t1, $t0 +; LA32-NEXT: sltu $t1, $t0, $t1 +; LA32-NEXT: mulh.wu $t2, $a1, $a2 +; LA32-NEXT: add.w $t1, $t2, $t1 +; LA32-NEXT: mul.w $t2, $a0, $a3 +; LA32-NEXT: add.w $t0, $t2, $t0 +; LA32-NEXT: sltu $t2, $t0, $t2 +; LA32-NEXT: mulh.wu $t3, $a0, $a3 +; LA32-NEXT: add.w $t2, $t3, $t2 +; LA32-NEXT: add.w $a6, $a7, $a6 +; LA32-NEXT: sltu $a7, $a6, $a7 +; LA32-NEXT: add.w $a5, $a5, $a7 +; LA32-NEXT: mul.w $a0, $a0, $a2 +; LA32-NEXT: mul.w $a2, $a1, $a3 +; LA32-NEXT: mulh.wu $a1, $a1, $a3 +; LA32-NEXT: add.w $a3, $t1, $t2 +; LA32-NEXT: sltu $a7, $a3, $t1 +; LA32-NEXT: add.w $a1, $a1, $a7 +; LA32-NEXT: st.w $a0, $a4, 0 +; LA32-NEXT: add.w $a0, $a2, $a3 +; LA32-NEXT: sltu $a2, $a0, $a2 +; LA32-NEXT: add.w $a1, $a1, $a2 +; LA32-NEXT: st.w $t0, $a4, 4 +; LA32-NEXT: add.w $a1, $a1, $a5 +; LA32-NEXT: add.w $a2, $a0, $a6 +; LA32-NEXT: sltu $a0, $a2, $a0 +; LA32-NEXT: add.w $a0, $a1, $a0 +; LA32-NEXT: srai.w $a1, $t0, 31 +; LA32-NEXT: xor $a0, $a0, $a1 +; LA32-NEXT: xor $a1, $a2, $a1 +; LA32-NEXT: or $a0, $a1, $a0 +; LA32-NEXT: sltu $a0, $zero, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: smuloi64: +; LA64: # %bb.0: +; LA64-NEXT: mul.d $a3, $a0, $a1 +; LA64-NEXT: st.d $a3, $a2, 0 +; LA64-NEXT: mulh.d $a0, $a0, $a1 +; LA64-NEXT: srai.d $a1, $a3, 63 +; LA64-NEXT: xor $a0, $a0, $a1 +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: ret + %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, ptr %res + ret i1 %obit +} + +define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { +; LA32-LABEL: smuloi128: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -96 +; LA32-NEXT: .cfi_def_cfa_offset 96 +; LA32-NEXT: st.w $ra, $sp, 92 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 88 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 84 # 4-byte Folded Spill +; LA32-NEXT: st.w $s1, $sp, 80 # 4-byte Folded Spill +; LA32-NEXT: st.w $s2, $sp, 76 # 4-byte Folded Spill +; LA32-NEXT: st.w $s3, $sp, 72 # 4-byte Folded Spill +; LA32-NEXT: st.w $s4, $sp, 68 # 4-byte Folded Spill +; LA32-NEXT: st.w $s5, $sp, 64 # 4-byte Folded Spill +; LA32-NEXT: st.w $s6, $sp, 60 # 4-byte Folded Spill +; LA32-NEXT: st.w $s7, $sp, 56 # 4-byte Folded Spill +; LA32-NEXT: st.w $s8, $sp, 52 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: .cfi_offset 23, -12 +; LA32-NEXT: .cfi_offset 24, -16 +; LA32-NEXT: .cfi_offset 25, -20 +; LA32-NEXT: .cfi_offset 26, -24 +; LA32-NEXT: .cfi_offset 27, -28 +; LA32-NEXT: .cfi_offset 28, -32 +; LA32-NEXT: .cfi_offset 29, -36 +; LA32-NEXT: .cfi_offset 30, -40 +; LA32-NEXT: .cfi_offset 31, -44 +; LA32-NEXT: st.w $a2, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ld.w $a6, $a1, 0 +; LA32-NEXT: ld.w $a7, $a0, 0 +; LA32-NEXT: mulh.wu $a3, $a7, $a6 +; LA32-NEXT: ld.w $a5, $a0, 4 +; LA32-NEXT: mul.w $a4, $a5, $a6 +; LA32-NEXT: add.w $a3, $a4, $a3 +; LA32-NEXT: sltu $a4, $a3, $a4 +; LA32-NEXT: mulh.wu $t0, $a5, $a6 +; LA32-NEXT: add.w $a4, $t0, $a4 +; LA32-NEXT: ld.w $t0, $a1, 4 +; LA32-NEXT: mul.w $t1, $a7, $t0 +; LA32-NEXT: add.w $a3, $t1, $a3 +; LA32-NEXT: st.w $a3, $sp, 44 # 4-byte Folded Spill +; LA32-NEXT: sltu $t1, $a3, $t1 +; LA32-NEXT: mulh.wu $t2, $a7, $t0 +; LA32-NEXT: add.w $t1, $t2, $t1 +; LA32-NEXT: ld.w $t4, $a0, 12 +; LA32-NEXT: ld.w $t2, $a0, 8 +; LA32-NEXT: ld.w $t3, $a1, 8 +; LA32-NEXT: mulh.wu $a0, $t2, $t3 +; LA32-NEXT: mul.w $t5, $t4, $t3 +; LA32-NEXT: add.w $a0, $t5, $a0 +; LA32-NEXT: sltu $t5, $a0, $t5 +; LA32-NEXT: mulh.wu $t6, $t4, $t3 +; LA32-NEXT: add.w $t5, $t6, $t5 +; LA32-NEXT: ld.w $t7, $a1, 12 +; LA32-NEXT: mul.w $a1, $t2, $t7 +; LA32-NEXT: add.w $a0, $a1, $a0 +; LA32-NEXT: st.w $a0, $sp, 48 # 4-byte Folded Spill +; LA32-NEXT: sltu $a1, $a0, $a1 +; LA32-NEXT: mulh.wu $t6, $t2, $t7 +; LA32-NEXT: add.w $t6, $t6, $a1 +; LA32-NEXT: srai.w $s7, $t4, 31 +; LA32-NEXT: mul.w $a1, $s7, $t7 +; LA32-NEXT: mulh.wu $t8, $s7, $t3 +; LA32-NEXT: add.w $t8, $t8, $a1 +; LA32-NEXT: mulh.wu $fp, $a6, $s7 +; LA32-NEXT: mul.w $s6, $t0, $s7 +; LA32-NEXT: add.w $s8, $s6, $fp +; LA32-NEXT: mul.w $a1, $a6, $s7 +; LA32-NEXT: add.w $ra, $a1, $s8 +; LA32-NEXT: sltu $s0, $ra, $a1 +; LA32-NEXT: add.w $a0, $fp, $s0 +; LA32-NEXT: add.w $a3, $a4, $t1 +; LA32-NEXT: st.w $a3, $sp, 20 # 4-byte Folded Spill +; LA32-NEXT: sltu $a4, $a3, $a4 +; LA32-NEXT: mulh.wu $t1, $a5, $t0 +; LA32-NEXT: add.w $a3, $t1, $a4 +; LA32-NEXT: st.w $a3, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: srai.w $s4, $t7, 31 +; LA32-NEXT: mul.w $fp, $a7, $s4 +; LA32-NEXT: mulh.wu $a4, $a7, $s4 +; LA32-NEXT: add.w $s1, $a4, $fp +; LA32-NEXT: sltu $s0, $s1, $fp +; LA32-NEXT: add.w $s5, $a4, $s0 +; LA32-NEXT: mul.w $a4, $s7, $t3 +; LA32-NEXT: add.w $t8, $t8, $a4 +; LA32-NEXT: add.w $s0, $ra, $t8 +; LA32-NEXT: add.w $a3, $a1, $a4 +; LA32-NEXT: st.w $a3, $sp, 32 # 4-byte Folded Spill +; LA32-NEXT: sltu $a4, $a3, $a1 +; LA32-NEXT: add.w $a3, $s0, $a4 +; LA32-NEXT: st.w $a3, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: add.w $s3, $t5, $t6 +; LA32-NEXT: sltu $a4, $s3, $t5 +; LA32-NEXT: mulh.wu $t5, $t4, $t7 +; LA32-NEXT: add.w $a3, $t5, $a4 +; LA32-NEXT: st.w $a3, $sp, 16 # 4-byte Folded Spill +; LA32-NEXT: mul.w $a4, $a7, $a6 +; LA32-NEXT: st.w $a4, $a2, 0 +; LA32-NEXT: sltu $a4, $s8, $s6 +; LA32-NEXT: mulh.wu $t5, $t0, $s7 +; LA32-NEXT: add.w $a4, $t5, $a4 +; LA32-NEXT: add.w $t1, $a4, $a0 +; LA32-NEXT: sltu $a4, $t1, $a4 +; LA32-NEXT: add.w $s2, $t5, $a4 +; LA32-NEXT: mulh.wu $a4, $a7, $t3 +; LA32-NEXT: mul.w $t5, $a5, $t3 +; LA32-NEXT: add.w $a4, $t5, $a4 +; LA32-NEXT: sltu $t5, $a4, $t5 +; LA32-NEXT: mulh.wu $t6, $a5, $t3 +; LA32-NEXT: add.w $a3, $t6, $t5 +; LA32-NEXT: mul.w $t6, $a7, $t7 +; LA32-NEXT: add.w $t5, $t6, $a4 +; LA32-NEXT: sltu $a4, $t5, $t6 +; LA32-NEXT: mulh.wu $t6, $a7, $t7 +; LA32-NEXT: add.w $a4, $t6, $a4 +; LA32-NEXT: mulh.wu $t6, $t2, $a6 +; LA32-NEXT: mul.w $s7, $t4, $a6 +; LA32-NEXT: add.w $t6, $s7, $t6 +; LA32-NEXT: sltu $s7, $t6, $s7 +; LA32-NEXT: mulh.wu $s8, $t4, $a6 +; LA32-NEXT: add.w $a0, $s8, $s7 +; LA32-NEXT: mul.w $s7, $t2, $t0 +; LA32-NEXT: add.w $t6, $s7, $t6 +; LA32-NEXT: sltu $s7, $t6, $s7 +; LA32-NEXT: mulh.wu $s8, $t2, $t0 +; LA32-NEXT: add.w $a2, $s8, $s7 +; LA32-NEXT: mul.w $s8, $a5, $s4 +; LA32-NEXT: add.w $s7, $s1, $s8 +; LA32-NEXT: add.w $s1, $s7, $ra +; LA32-NEXT: add.w $a1, $fp, $a1 +; LA32-NEXT: st.w $a1, $sp, 40 # 4-byte Folded Spill +; LA32-NEXT: sltu $ra, $a1, $fp +; LA32-NEXT: add.w $a1, $s1, $ra +; LA32-NEXT: st.w $a1, $sp, 36 # 4-byte Folded Spill +; LA32-NEXT: xor $s0, $a1, $s7 +; LA32-NEXT: sltui $s0, $s0, 1 +; LA32-NEXT: sltu $a1, $a1, $s7 +; LA32-NEXT: masknez $s1, $a1, $s0 +; LA32-NEXT: maskeqz $s0, $ra, $s0 +; LA32-NEXT: add.w $t1, $s6, $t1 +; LA32-NEXT: sltu $s6, $t1, $s6 +; LA32-NEXT: add.w $s2, $s2, $s6 +; LA32-NEXT: add.w $a2, $a0, $a2 +; LA32-NEXT: sltu $a0, $a2, $a0 +; LA32-NEXT: mulh.wu $s6, $t4, $t0 +; LA32-NEXT: add.w $t8, $s6, $a0 +; LA32-NEXT: add.w $a4, $a3, $a4 +; LA32-NEXT: sltu $a3, $a4, $a3 +; LA32-NEXT: mulh.wu $s6, $a5, $t7 +; LA32-NEXT: add.w $a3, $s6, $a3 +; LA32-NEXT: mul.w $s6, $t4, $t7 +; LA32-NEXT: mul.w $t7, $a5, $t7 +; LA32-NEXT: mul.w $ra, $t4, $t0 +; LA32-NEXT: mul.w $t0, $a5, $t0 +; LA32-NEXT: mul.w $t4, $t4, $s4 +; LA32-NEXT: mul.w $a7, $a7, $t3 +; LA32-NEXT: mul.w $a6, $t2, $a6 +; LA32-NEXT: mul.w $t3, $t2, $t3 +; LA32-NEXT: mul.w $a0, $t2, $s4 +; LA32-NEXT: mulh.wu $t2, $t2, $s4 +; LA32-NEXT: mulh.wu $a5, $s4, $a5 +; LA32-NEXT: sltu $s4, $s7, $s8 +; LA32-NEXT: add.w $s4, $a5, $s4 +; LA32-NEXT: add.w $s4, $s5, $s4 +; LA32-NEXT: sltu $s5, $s4, $s5 +; LA32-NEXT: add.w $s5, $a5, $s5 +; LA32-NEXT: ld.w $a1, $sp, 20 # 4-byte Folded Reload +; LA32-NEXT: add.w $a1, $t0, $a1 +; LA32-NEXT: sltu $a5, $a1, $t0 +; LA32-NEXT: ld.w $t0, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: add.w $t0, $t0, $a5 +; LA32-NEXT: or $s0, $s0, $s1 +; LA32-NEXT: add.w $a4, $t7, $a4 +; LA32-NEXT: sltu $a5, $a4, $t7 +; LA32-NEXT: add.w $t7, $a3, $a5 +; LA32-NEXT: add.w $s1, $ra, $a2 +; LA32-NEXT: sltu $a2, $s1, $ra +; LA32-NEXT: add.w $t8, $t8, $a2 +; LA32-NEXT: add.w $a5, $s6, $s3 +; LA32-NEXT: sltu $a2, $a5, $s6 +; LA32-NEXT: ld.w $a3, $sp, 16 # 4-byte Folded Reload +; LA32-NEXT: add.w $a2, $a3, $a2 +; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: ld.w $a3, $sp, 44 # 4-byte Folded Reload +; LA32-NEXT: st.w $a3, $s6, 4 +; LA32-NEXT: ld.w $a3, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: add.w $a3, $s2, $a3 +; LA32-NEXT: ld.w $s2, $sp, 32 # 4-byte Folded Reload +; LA32-NEXT: add.w $s2, $t1, $s2 +; LA32-NEXT: sltu $t1, $s2, $t1 +; LA32-NEXT: add.w $a3, $a3, $t1 +; LA32-NEXT: add.w $t1, $s8, $s4 +; LA32-NEXT: sltu $s3, $t1, $s8 +; LA32-NEXT: add.w $s3, $s5, $s3 +; LA32-NEXT: add.w $t2, $t2, $a0 +; LA32-NEXT: add.w $t2, $t2, $t4 +; LA32-NEXT: add.w $t2, $t2, $s7 +; LA32-NEXT: add.w $t4, $a0, $fp +; LA32-NEXT: sltu $a0, $t4, $a0 +; LA32-NEXT: add.w $a0, $t2, $a0 +; LA32-NEXT: add.w $a0, $s3, $a0 +; LA32-NEXT: add.w $t2, $t1, $t4 +; LA32-NEXT: sltu $t1, $t2, $t1 +; LA32-NEXT: add.w $a0, $a0, $t1 +; LA32-NEXT: add.w $a0, $a0, $a3 +; LA32-NEXT: add.w $t1, $t2, $s2 +; LA32-NEXT: sltu $a3, $t1, $t2 +; LA32-NEXT: add.w $a0, $a0, $a3 +; LA32-NEXT: add.w $a3, $t6, $t0 +; LA32-NEXT: add.w $a1, $a6, $a1 +; LA32-NEXT: sltu $a6, $a1, $a6 +; LA32-NEXT: add.w $t0, $a3, $a6 +; LA32-NEXT: add.w $a1, $a7, $a1 +; LA32-NEXT: sltu $a7, $a1, $a7 +; LA32-NEXT: add.w $a3, $t5, $t0 +; LA32-NEXT: add.w $a3, $a3, $a7 +; LA32-NEXT: sltu $t2, $a3, $t5 +; LA32-NEXT: xor $t4, $a3, $t5 +; LA32-NEXT: sltui $t4, $t4, 1 +; LA32-NEXT: masknez $t2, $t2, $t4 +; LA32-NEXT: maskeqz $a7, $a7, $t4 +; LA32-NEXT: st.w $a1, $s6, 8 +; LA32-NEXT: or $a1, $a7, $t2 +; LA32-NEXT: sltu $a7, $t0, $t6 +; LA32-NEXT: xor $t0, $t0, $t6 +; LA32-NEXT: sltui $t0, $t0, 1 +; LA32-NEXT: masknez $a7, $a7, $t0 +; LA32-NEXT: maskeqz $a6, $a6, $t0 +; LA32-NEXT: or $a6, $a6, $a7 +; LA32-NEXT: add.w $a6, $s1, $a6 +; LA32-NEXT: sltu $a7, $a6, $s1 +; LA32-NEXT: add.w $a7, $t8, $a7 +; LA32-NEXT: add.w $a1, $a4, $a1 +; LA32-NEXT: sltu $a4, $a1, $a4 +; LA32-NEXT: add.w $a4, $t7, $a4 +; LA32-NEXT: add.w $t0, $t1, $s0 +; LA32-NEXT: sltu $t1, $t0, $t1 +; LA32-NEXT: add.w $a0, $a0, $t1 +; LA32-NEXT: st.w $a3, $s6, 12 +; LA32-NEXT: add.w $a1, $a6, $a1 +; LA32-NEXT: sltu $a6, $a1, $a6 +; LA32-NEXT: add.w $a4, $a7, $a4 +; LA32-NEXT: add.w $a4, $a4, $a6 +; LA32-NEXT: sltu $t1, $a4, $a7 +; LA32-NEXT: xor $a7, $a4, $a7 +; LA32-NEXT: sltui $a7, $a7, 1 +; LA32-NEXT: masknez $t1, $t1, $a7 +; LA32-NEXT: maskeqz $a6, $a6, $a7 +; LA32-NEXT: or $a6, $a6, $t1 +; LA32-NEXT: add.w $a6, $a5, $a6 +; LA32-NEXT: sltu $a5, $a6, $a5 +; LA32-NEXT: add.w $a2, $a2, $a5 +; LA32-NEXT: ld.w $t1, $sp, 48 # 4-byte Folded Reload +; LA32-NEXT: add.w $a4, $t1, $a4 +; LA32-NEXT: add.w $a1, $t3, $a1 +; LA32-NEXT: sltu $a5, $a1, $t3 +; LA32-NEXT: add.w $a4, $a4, $a5 +; LA32-NEXT: sltu $a7, $a4, $t1 +; LA32-NEXT: xor $t1, $a4, $t1 +; LA32-NEXT: sltui $t1, $t1, 1 +; LA32-NEXT: masknez $a7, $a7, $t1 +; LA32-NEXT: maskeqz $a5, $a5, $t1 +; LA32-NEXT: or $a5, $a5, $a7 +; LA32-NEXT: add.w $a5, $a6, $a5 +; LA32-NEXT: sltu $a6, $a5, $a6 +; LA32-NEXT: add.w $a2, $a2, $a6 +; LA32-NEXT: add.w $a0, $a2, $a0 +; LA32-NEXT: add.w $a2, $a5, $t0 +; LA32-NEXT: sltu $a5, $a2, $a5 +; LA32-NEXT: add.w $a0, $a0, $a5 +; LA32-NEXT: ld.w $a5, $sp, 40 # 4-byte Folded Reload +; LA32-NEXT: add.w $a5, $a1, $a5 +; LA32-NEXT: sltu $a1, $a5, $a1 +; LA32-NEXT: ld.w $a6, $sp, 36 # 4-byte Folded Reload +; LA32-NEXT: add.w $a6, $a4, $a6 +; LA32-NEXT: add.w $a6, $a6, $a1 +; LA32-NEXT: sltu $a7, $a6, $a4 +; LA32-NEXT: xor $a4, $a6, $a4 +; LA32-NEXT: sltui $a4, $a4, 1 +; LA32-NEXT: masknez $a7, $a7, $a4 +; LA32-NEXT: maskeqz $a1, $a1, $a4 +; LA32-NEXT: or $a1, $a1, $a7 +; LA32-NEXT: add.w $a1, $a2, $a1 +; LA32-NEXT: sltu $a2, $a1, $a2 +; LA32-NEXT: add.w $a0, $a0, $a2 +; LA32-NEXT: srai.w $a2, $a3, 31 +; LA32-NEXT: xor $a3, $a6, $a2 +; LA32-NEXT: xor $a0, $a0, $a2 +; LA32-NEXT: or $a0, $a3, $a0 +; LA32-NEXT: xor $a3, $a5, $a2 +; LA32-NEXT: xor $a1, $a1, $a2 +; LA32-NEXT: or $a1, $a3, $a1 +; LA32-NEXT: or $a0, $a1, $a0 +; LA32-NEXT: sltu $a0, $zero, $a0 +; LA32-NEXT: ld.w $s8, $sp, 52 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s7, $sp, 56 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s6, $sp, 60 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s5, $sp, 64 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s4, $sp, 68 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s3, $sp, 72 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s2, $sp, 76 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s1, $sp, 80 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s0, $sp, 84 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 88 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 92 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 96 +; LA32-NEXT: ret +; +; LA64-LABEL: smuloi128: +; LA64: # %bb.0: +; LA64-NEXT: srai.d $a5, $a1, 63 +; LA64-NEXT: mul.d $a6, $a2, $a5 +; LA64-NEXT: mulh.du $a7, $a2, $a5 +; LA64-NEXT: add.d $a7, $a7, $a6 +; LA64-NEXT: mul.d $a5, $a3, $a5 +; LA64-NEXT: add.d $a5, $a7, $a5 +; LA64-NEXT: srai.d $a7, $a3, 63 +; LA64-NEXT: mul.d $t0, $a7, $a1 +; LA64-NEXT: mulh.du $t1, $a7, $a0 +; LA64-NEXT: add.d $t0, $t1, $t0 +; LA64-NEXT: mul.d $a7, $a7, $a0 +; LA64-NEXT: add.d $t0, $t0, $a7 +; LA64-NEXT: add.d $a5, $t0, $a5 +; LA64-NEXT: mulh.du $t0, $a0, $a2 +; LA64-NEXT: mul.d $t1, $a1, $a2 +; LA64-NEXT: add.d $t0, $t1, $t0 +; LA64-NEXT: sltu $t1, $t0, $t1 +; LA64-NEXT: mulh.du $t2, $a1, $a2 +; LA64-NEXT: add.d $t1, $t2, $t1 +; LA64-NEXT: mul.d $t2, $a0, $a3 +; LA64-NEXT: add.d $t0, $t2, $t0 +; LA64-NEXT: sltu $t2, $t0, $t2 +; LA64-NEXT: mulh.du $t3, $a0, $a3 +; LA64-NEXT: add.d $t2, $t3, $t2 +; LA64-NEXT: add.d $a6, $a7, $a6 +; LA64-NEXT: sltu $a7, $a6, $a7 +; LA64-NEXT: add.d $a5, $a5, $a7 +; LA64-NEXT: mul.d $a0, $a0, $a2 +; LA64-NEXT: mul.d $a2, $a1, $a3 +; LA64-NEXT: mulh.du $a1, $a1, $a3 +; LA64-NEXT: add.d $a3, $t1, $t2 +; LA64-NEXT: sltu $a7, $a3, $t1 +; LA64-NEXT: add.d $a1, $a1, $a7 +; LA64-NEXT: st.d $a0, $a4, 0 +; LA64-NEXT: add.d $a0, $a2, $a3 +; LA64-NEXT: sltu $a2, $a0, $a2 +; LA64-NEXT: add.d $a1, $a1, $a2 +; LA64-NEXT: st.d $t0, $a4, 8 +; LA64-NEXT: add.d $a1, $a1, $a5 +; LA64-NEXT: add.d $a2, $a0, $a6 +; LA64-NEXT: sltu $a0, $a2, $a0 +; LA64-NEXT: add.d $a0, $a1, $a0 +; LA64-NEXT: srai.d $a1, $t0, 63 +; LA64-NEXT: xor $a0, $a0, $a1 +; LA64-NEXT: xor $a1, $a2, $a1 +; LA64-NEXT: or $a0, $a1, $a0 +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: ret + %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2) + %val = extractvalue {i128, i1} %t, 0 + %obit = extractvalue {i128, i1} %t, 1 + store i128 %val, ptr %res + ret i1 %obit +} + +declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone +declare {i128, i1} @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone diff --git a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir new file mode 100644 index 0000000000000000000000000000000000000000..70cd5fb8d7eb690dbc0edc2f83ba65209f39c04f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir @@ -0,0 +1,62 @@ +# RUN: llc --mtriple=loongarch64 --filetype=obj %s -o - | \ +# RUN: llvm-objdump -d - | FileCheck %s + +# REQUIRES: asserts + +## Check that bl can get fixupkind correctly, whether BL contains +## target-flags(loongarch-call) or not. + +--- | + target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" + target triple = "loongarch64" + + define dso_local void @test_bl_fixupkind_with_flag() { + ; CHECK-LABEL: test_bl_fixupkind_with_flag + ; CHECK: addi.d $sp, $sp, -16 + ; CHECK-NEXT: st.d $ra, $sp, 8 + ; CHECK-NEXT: bl 0 + ; CHECK-NEXT: ld.d $ra, $sp, 8 + ; CHECK-NEXT: addi.d $sp, $sp, 16 + ; CHECK-NEXT: ret + entry: + call void @foo() + ret void + } + + define dso_local void @test_bl_fixupkind_without_flag() { + ; CHECK-LABEL: test_bl_fixupkind_without_flag + ; CHECK: addi.d $sp, $sp, -16 + ; CHECK-NEXT: st.d $ra, $sp, 8 + ; CHECK-NEXT: bl 0 + ; CHECK-NEXT: ld.d $ra, $sp, 8 + ; CHECK-NEXT: addi.d $sp, $sp, 16 + ; CHECK-NEXT: ret + entry: + call void @foo() + ret void + } + + declare dso_local void @foo(...) +... +--- +name: test_bl_fixupkind_with_flag +tracksRegLiveness: true +body: | + bb.0.entry: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $r3, implicit $r3 + BL target-flags(loongarch-call) @foo, csr_ilp32d_lp64d, implicit-def $r1, implicit-def dead $r1, implicit-def $r3 + ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3 + PseudoRET + +... +--- +name: test_bl_fixupkind_without_flag +tracksRegLiveness: true +body: | + bb.0.entry: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $r3, implicit $r3 + BL @foo, csr_ilp32d_lp64d, implicit-def $r1, implicit-def dead $r1, implicit-def $r3 + ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3 + PseudoRET + +... diff --git a/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s b/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s new file mode 100644 index 0000000000000000000000000000000000000000..8cb00aef9954272ca2b95b7fff4dd8819ba064c4 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s @@ -0,0 +1,76 @@ +# RUN: llvm-mc --triple=loongarch32 --filetype=obj < %s | \ +# RUN: llvm-objdump -d --no-show-raw-insn - | FileCheck %s +# RUN: llvm-mc --triple=loongarch64 --filetype=obj < %s | \ +# RUN: llvm-objdump -d --no-show-raw-insn - | FileCheck %s + +# CHECK-LABEL: : +foo: +# CHECK: beq $a0, $a1, 108 +beq $a0, $a1, .Llocal +# CHECK: bne $a0, $a1, 104 +bne $a0, $a1, .Llocal +# CHECK: blt $a0, $a1, 100 +blt $a0, $a1, .Llocal +# CHECK: bltu $a0, $a1, 96 +bltu $a0, $a1, .Llocal +# CHECK: bge $a0, $a1, 92 +bge $a0, $a1, .Llocal +# CHECK: bgeu $a0, $a1, 88 +bgeu $a0, $a1, .Llocal +# CHECK: beqz $a0, 84 +beqz $a0, .Llocal +# CHECK: bnez $a0, 80 +bnez $a0, .Llocal +# CHECK: bceqz $fcc6, 76 +bceqz $fcc6, .Llocal +# CHECK: bcnez $fcc6, 72 +bcnez $fcc6, .Llocal + +# CHECK: beq $a0, $a1, 76 +beq $a0, $a1, bar +# CHECK: bne $a0, $a1, 72 +bne $a0, $a1, bar +# CHECK: blt $a0, $a1, 68 +blt $a0, $a1, bar +# CHECK: bltu $a0, $a1, 64 +bltu $a0, $a1, bar +# CHECK: bge $a0, $a1, 60 +bge $a0, $a1, bar +# CHECK: bgeu $a0, $a1, 56 +bgeu $a0, $a1, bar +# CHECK: beqz $a0, 52 +beqz $a0, bar +# CHECK: bnez $a0, 48 +bnez $a0, bar +# CHECK: bceqz $fcc6, 44 +bceqz $fcc6, bar +# CHECK: bcnez $fcc6, 40 +bcnez $fcc6, bar + +# CHECK: b 28 +b .Llocal +# CHECK: b 32 +b bar + +# CHECK: bl 20 +bl .Llocal +# CHECK: bl 24 +bl bar + +# CHECK: jirl $zero, $a0, 4{{$}} +jirl $zero, $a0, 4 +# CHECK: jirl $ra, $a0, 4{{$}} +jirl $ra, $a0, 4 +# CHECK: ret +ret + +.Llocal: +# CHECK: 6c: nop +# CHECK: nop +nop +nop + +# CHECK-LABEL: : +bar: +# CHECK: 74: nop +nop diff --git a/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg b/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg new file mode 100644 index 0000000000000000000000000000000000000000..cc24278acbb414ab5be93cffabda76082cc18a3a --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg @@ -0,0 +1,2 @@ +if not "LoongArch" in config.root.targets: + config.unsupported = True diff --git a/llvm/unittests/Target/LoongArch/CMakeLists.txt b/llvm/unittests/Target/LoongArch/CMakeLists.txt index fef4f8e154618347c666c29c2cd58cacde3ee74d..e6f8ec073721f6ee8e5a09876f5c56f8375dc609 100644 --- a/llvm/unittests/Target/LoongArch/CMakeLists.txt +++ b/llvm/unittests/Target/LoongArch/CMakeLists.txt @@ -20,6 +20,7 @@ set(LLVM_LINK_COMPONENTS add_llvm_target_unittest(LoongArchTests InstSizes.cpp + MCInstrAnalysisTest.cpp ) set_property(TARGET LoongArchTests PROPERTY FOLDER "Tests/UnitTests/TargetTests") diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..468ee79615d643c953255a8cc7f0450087a4b218 --- /dev/null +++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp @@ -0,0 +1,121 @@ +//===- MCInstrAnalysisTest.cpp - LoongArchMCInstrAnalysis unit tests ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCInstrAnalysis.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" + +#include "gtest/gtest.h" + +#include + +using namespace llvm; + +namespace { + +class InstrAnalysisTest : public testing::TestWithParam { +protected: + std::unique_ptr Info; + std::unique_ptr Analysis; + + static void SetUpTestSuite() { + LLVMInitializeLoongArchTargetInfo(); + LLVMInitializeLoongArchTarget(); + LLVMInitializeLoongArchTargetMC(); + } + + InstrAnalysisTest() { + std::string Error; + const Target *TheTarget = + TargetRegistry::lookupTarget(Triple::normalize(GetParam()), Error); + Info = std::unique_ptr(TheTarget->createMCInstrInfo()); + Analysis = std::unique_ptr( + TheTarget->createMCInstrAnalysis(Info.get())); + } +}; + +} // namespace + +static MCInst beq() { + return MCInstBuilder(LoongArch::BEQ) + .addReg(LoongArch::R0) + .addReg(LoongArch::R1) + .addImm(32); +} + +static MCInst b() { return MCInstBuilder(LoongArch::B).addImm(32); } + +static MCInst bl() { return MCInstBuilder(LoongArch::BL).addImm(32); } + +static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) { + return MCInstBuilder(LoongArch::JIRL).addReg(RD).addReg(RJ).addImm(16); +} + +TEST_P(InstrAnalysisTest, IsTerminator) { + EXPECT_TRUE(Analysis->isTerminator(beq())); + EXPECT_TRUE(Analysis->isTerminator(b())); + EXPECT_FALSE(Analysis->isTerminator(bl())); + EXPECT_TRUE(Analysis->isTerminator(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isTerminator(jirl(LoongArch::R5))); +} + +TEST_P(InstrAnalysisTest, IsCall) { + EXPECT_FALSE(Analysis->isCall(beq())); + EXPECT_FALSE(Analysis->isCall(b())); + EXPECT_TRUE(Analysis->isCall(bl())); + EXPECT_TRUE(Analysis->isCall(jirl(LoongArch::R1))); + EXPECT_FALSE(Analysis->isCall(jirl(LoongArch::R0))); +} + +TEST_P(InstrAnalysisTest, IsReturn) { + EXPECT_FALSE(Analysis->isReturn(beq())); + EXPECT_FALSE(Analysis->isReturn(b())); + EXPECT_FALSE(Analysis->isReturn(bl())); + EXPECT_TRUE(Analysis->isReturn(jirl(LoongArch::R0, LoongArch::R1))); + EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R1))); +} + +TEST_P(InstrAnalysisTest, IsBranch) { + EXPECT_TRUE(Analysis->isBranch(beq())); + EXPECT_TRUE(Analysis->isBranch(b())); + EXPECT_FALSE(Analysis->isBranch(bl())); + EXPECT_TRUE(Analysis->isBranch(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R1))); + EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R0, LoongArch::R1))); +} + +TEST_P(InstrAnalysisTest, IsConditionalBranch) { + EXPECT_TRUE(Analysis->isConditionalBranch(beq())); + EXPECT_FALSE(Analysis->isConditionalBranch(b())); + EXPECT_FALSE(Analysis->isConditionalBranch(bl())); +} + +TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { + EXPECT_FALSE(Analysis->isUnconditionalBranch(beq())); + EXPECT_TRUE(Analysis->isUnconditionalBranch(b())); + EXPECT_FALSE(Analysis->isUnconditionalBranch(bl())); + EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1))); + EXPECT_FALSE( + Analysis->isUnconditionalBranch(jirl(LoongArch::R0, LoongArch::R1))); +} + +TEST_P(InstrAnalysisTest, IsIndirectBranch) { + EXPECT_FALSE(Analysis->isIndirectBranch(beq())); + EXPECT_FALSE(Analysis->isIndirectBranch(b())); + EXPECT_FALSE(Analysis->isIndirectBranch(bl())); + EXPECT_TRUE(Analysis->isIndirectBranch(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R1))); + EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R0, LoongArch::R1))); +} + +INSTANTIATE_TEST_SUITE_P(LA32And64, InstrAnalysisTest, + testing::Values("loongarch32", "loongarch64"));