diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 65d1c4e2d6515..1a1f25cc7a90c 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -802,6 +802,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, setOperationAction(ISD::BSWAP, VT, Expand); } + if (!Subtarget->isThumb1Only() && !Subtarget->hasMVEIntegerOps()) + setOperationAction(ISD::SCMP, MVT::i32, Custom); + + if (!Subtarget->hasMVEIntegerOps()) + setOperationAction(ISD::UCMP, MVT::i32, Custom); + setOperationAction(ISD::ConstantFP, MVT::f32, Custom); setOperationAction(ISD::ConstantFP, MVT::f64, Custom); @@ -1628,6 +1634,10 @@ bool ARMTargetLowering::useSoftFloat() const { return Subtarget->useSoftFloat(); } +bool ARMTargetLowering::shouldExpandCmpUsingSelects(EVT VT) const { + return (!Subtarget->isThumb1Only() && VT.getSizeInBits() <= 32); +} + // FIXME: It might make sense to define the representative register class as the // nearest super-register that has a non-null superset. For example, DPR_VFP2 is // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, @@ -10614,6 +10624,181 @@ SDValue ARMTargetLowering::LowerFP_TO_BF16(SDValue Op, return DAG.getBitcast(MVT::i32, Res); } +SDValue ARMTargetLowering::LowerSCMP(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + + // For the ARM assembly pattern: + // subs r0, r0, r1 ; subtract RHS from LHS and set flags + // movgt r0, #1 ; if LHS > RHS, set result to 1 + // mvnlt r0, #0 ; if LHS < RHS, set result to -1 (mvn #0 = -1) + // ; if LHS == RHS, result remains 0 from the subs + + // Optimization: if RHS is a subtraction against 0, use ADDC instead of SUBC + // Check if RHS is (0 - something), and if so use ADDC with LHS + something + SDValue SubResult, Flags; + bool CanUseAdd = false; + SDValue AddOperand; + + // Check if RHS is a subtraction against 0: (0 - X) + if (RHS.getOpcode() == ISD::SUB) { + SDValue SubLHS = RHS.getOperand(0); + SDValue SubRHS = RHS.getOperand(1); + + // Check if it's 0 - X + if (isNullConstant(SubLHS)) { + // For SCMP: only if X is known to never be INT_MIN (to avoid overflow) + if (RHS->getFlags().hasNoSignedWrap() || !DAG.computeKnownBits(SubRHS) + .getSignedMinValue() + .isMinSignedValue()) { + CanUseAdd = true; + AddOperand = SubRHS; // Replace RHS with X, so we do LHS + X instead of + // LHS - (0 - X) + } + } + } + + if (CanUseAdd) { + // Use ADDC: LHS + AddOperand (where RHS was 0 - AddOperand) + SDValue AddWithFlags = DAG.getNode( + ARMISD::ADDC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, AddOperand); + SubResult = AddWithFlags.getValue(0); // The addition result + Flags = AddWithFlags.getValue(1); // The flags from ADDS + } else { + // Use ARMISD::SUBC to generate SUBS instruction (subtract with flags) + SDValue SubWithFlags = DAG.getNode( + ARMISD::SUBC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS); + SubResult = SubWithFlags.getValue(0); // The subtraction result + Flags = SubWithFlags.getValue(1); // The flags from SUBS + } + + // Constants for conditional moves + SDValue One = DAG.getConstant(1, dl, MVT::i32); + SDValue MinusOne = DAG.getAllOnesConstant(dl, MVT::i32); + + // movgt: if greater than, set to 1 + SDValue GTCond = DAG.getConstant(ARMCC::GT, dl, MVT::i32); + SDValue Result1 = + DAG.getNode(ARMISD::CMOV, dl, MVT::i32, SubResult, One, GTCond, Flags); + + // mvnlt: if less than, set to -1 (equivalent to mvn #0) + SDValue LTCond = DAG.getConstant(ARMCC::LT, dl, MVT::i32); + SDValue Result2 = + DAG.getNode(ARMISD::CMOV, dl, MVT::i32, Result1, MinusOne, LTCond, Flags); + + if (Op.getValueType() != MVT::i32) + Result2 = DAG.getSExtOrTrunc(Result2, dl, Op.getValueType()); + + return Result2; +} + +SDValue ARMTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + + if (Subtarget->isThumb1Only()) { + // For Thumb unsigned comparison, use this sequence: + // subs r2, r0, r1 ; r2 = LHS - RHS, sets flags + // sbc r2, r2 ; r2 = r2 - r2 - !carry + // cmp r1, r0 ; compare RHS with LHS + // sbc r1, r1 ; r1 = r1 - r1 - !carry + // subs r0, r2, r1 ; r0 = r2 - r1 (final result) + + // First subtraction: LHS - RHS + SDValue Sub1WithFlags = DAG.getNode( + ARMISD::SUBC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS); + SDValue Sub1Result = Sub1WithFlags.getValue(0); + SDValue Flags1 = Sub1WithFlags.getValue(1); + + // SUBE: Sub1Result - Sub1Result - !carry + // This gives 0 if LHS >= RHS (unsigned), -1 if LHS < RHS (unsigned) + SDValue Sbc1 = + DAG.getNode(ARMISD::SUBE, dl, DAG.getVTList(MVT::i32, FlagsVT), + Sub1Result, Sub1Result, Flags1); + SDValue Sbc1Result = Sbc1.getValue(0); + + // Second comparison: RHS vs LHS (reverse comparison) + SDValue CmpFlags = DAG.getNode(ARMISD::CMP, dl, FlagsVT, RHS, LHS); + + // SUBE: RHS - RHS - !carry + // This gives 0 if RHS <= LHS (unsigned), -1 if RHS > LHS (unsigned) + SDValue Sbc2 = DAG.getNode( + ARMISD::SUBE, dl, DAG.getVTList(MVT::i32, FlagsVT), RHS, RHS, CmpFlags); + SDValue Sbc2Result = Sbc2.getValue(0); + + // Final subtraction: Sbc1Result - Sbc2Result (no flags needed) + SDValue Result = + DAG.getNode(ISD::SUB, dl, MVT::i32, Sbc1Result, Sbc2Result); + if (Op.getValueType() != MVT::i32) + Result = DAG.getSExtOrTrunc(Result, dl, Op.getValueType()); + + return Result; + } + + // For the ARM assembly pattern (unsigned version): + // subs r0, r0, r1 ; subtract RHS from LHS and set flags + // movhi r0, #1 ; if LHS > RHS (unsigned), set result to 1 + // mvnlo r0, #0 ; if LHS < RHS (unsigned), set result to -1 + // ; if LHS == RHS, result remains 0 from the subs + + // Optimization: if RHS is a subtraction against 0, use ADDC instead of SUBC + // Check if RHS is (0 - something), and if so use ADDC with LHS + something + SDValue SubResult, Flags; + bool CanUseAdd = false; + SDValue AddOperand; + + // Check if RHS is a subtraction against 0: (0 - X) + if (RHS.getOpcode() == ISD::SUB) { + SDValue SubLHS = RHS.getOperand(0); + SDValue SubRHS = RHS.getOperand(1); + + // Check if it's 0 - X + if (isNullConstant(SubLHS)) { + // For UCMP: only if X is known to never be zero + if (DAG.isKnownNeverZero(SubRHS)) { + CanUseAdd = true; + AddOperand = SubRHS; // Replace RHS with X, so we do LHS + X instead of + // LHS - (0 - X) + } + } + } + + if (CanUseAdd) { + // Use ADDC: LHS + AddOperand (where RHS was 0 - AddOperand) + SDValue AddWithFlags = DAG.getNode( + ARMISD::ADDC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, AddOperand); + SubResult = AddWithFlags.getValue(0); // The addition result + Flags = AddWithFlags.getValue(1); // The flags from ADDS + } else { + // Use ARMISD::SUBC to generate SUBS instruction (subtract with flags) + SDValue SubWithFlags = DAG.getNode( + ARMISD::SUBC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS); + SubResult = SubWithFlags.getValue(0); // The subtraction result + Flags = SubWithFlags.getValue(1); // The flags from SUBS + } + + // Constants for conditional moves + SDValue One = DAG.getConstant(1, dl, MVT::i32); + SDValue MinusOne = DAG.getAllOnesConstant(dl, MVT::i32); + + // movhi: if higher (unsigned greater than), set to 1 + SDValue HICond = DAG.getConstant(ARMCC::HI, dl, MVT::i32); + SDValue Result1 = + DAG.getNode(ARMISD::CMOV, dl, MVT::i32, SubResult, One, HICond, Flags); + + // mvnlo: if lower (unsigned less than), set to -1 + SDValue LOCond = DAG.getConstant(ARMCC::LO, dl, MVT::i32); + SDValue Result2 = + DAG.getNode(ARMISD::CMOV, dl, MVT::i32, Result1, MinusOne, LOCond, Flags); + + if (Op.getValueType() != MVT::i32) + Result2 = DAG.getSExtOrTrunc(Result2, dl, Op.getValueType()); + + return Result2; +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump()); switch (Op.getOpcode()) { @@ -10742,6 +10927,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FP_TO_BF16: return LowerFP_TO_BF16(Op, DAG); case ARMISD::WIN__DBZCHK: return SDValue(); + case ISD::SCMP: + return LowerSCMP(Op, DAG); + case ISD::UCMP: + return LowerUCMP(Op, DAG); } } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 5f4aef55b22c9..f3b132da7b946 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -607,6 +607,8 @@ class VectorType; bool preferZeroCompareBranch() const override { return true; } + bool shouldExpandCmpUsingSelects(EVT VT) const override; + bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; bool hasAndNotCompare(SDValue V) const override { @@ -903,6 +905,8 @@ class VectorType; void LowerLOAD(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const; SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSCMP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUCMP(SDValue Op, SelectionDAG &DAG) const; Register getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const override; diff --git a/llvm/test/CodeGen/ARM/scmp.ll b/llvm/test/CodeGen/ARM/scmp.ll index 6e493c993751c..9189aee6aaf43 100644 --- a/llvm/test/CodeGen/ARM/scmp.ll +++ b/llvm/test/CodeGen/ARM/scmp.ll @@ -4,12 +4,9 @@ define i8 @scmp_8_8(i8 signext %x, i8 signext %y) nounwind { ; CHECK-LABEL: scmp_8_8: ; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r0, r1 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: movwgt r2, #1 -; CHECK-NEXT: sub r0, r2, r0 +; CHECK-NEXT: subs r0, r0, r1 +; CHECK-NEXT: movwgt r0, #1 +; CHECK-NEXT: mvnlt r0, #0 ; CHECK-NEXT: bx lr %1 = call i8 @llvm.scmp(i8 %x, i8 %y) ret i8 %1 @@ -18,12 +15,9 @@ define i8 @scmp_8_8(i8 signext %x, i8 signext %y) nounwind { define i8 @scmp_8_16(i16 signext %x, i16 signext %y) nounwind { ; CHECK-LABEL: scmp_8_16: ; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r0, r1 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: movwgt r2, #1 -; CHECK-NEXT: sub r0, r2, r0 +; CHECK-NEXT: subs r0, r0, r1 +; CHECK-NEXT: movwgt r0, #1 +; CHECK-NEXT: mvnlt r0, #0 ; CHECK-NEXT: bx lr %1 = call i8 @llvm.scmp(i16 %x, i16 %y) ret i8 %1 @@ -32,12 +26,9 @@ define i8 @scmp_8_16(i16 signext %x, i16 signext %y) nounwind { define i8 @scmp_8_32(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: scmp_8_32: ; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r0, r1 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: movwgt r2, #1 -; CHECK-NEXT: sub r0, r2, r0 +; CHECK-NEXT: subs r0, r0, r1 +; CHECK-NEXT: movwgt r0, #1 +; CHECK-NEXT: mvnlt r0, #0 ; CHECK-NEXT: bx lr %1 = call i8 @llvm.scmp(i32 %x, i32 %y) ret i8 %1 @@ -92,17 +83,26 @@ define i8 @scmp_8_128(i128 %x, i128 %y) nounwind { define i32 @scmp_32_32(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: scmp_32_32: ; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r0, r1 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: movwgt r2, #1 -; CHECK-NEXT: sub r0, r2, r0 +; CHECK-NEXT: subs r0, r0, r1 +; CHECK-NEXT: movwgt r0, #1 +; CHECK-NEXT: mvnlt r0, #0 ; CHECK-NEXT: bx lr %1 = call i32 @llvm.scmp(i32 %x, i32 %y) ret i32 %1 } +define i32 @scmp_neg(i32 %x, i32 %y) nounwind { +; CHECK-LABEL: scmp_neg: +; CHECK: @ %bb.0: +; CHECK-NEXT: adds r0, r0, r1 +; CHECK-NEXT: movwgt r0, #1 +; CHECK-NEXT: mvnlt r0, #0 +; CHECK-NEXT: bx lr + %yy = sub nsw i32 0, %y + %1 = call i32 @llvm.scmp(i32 %x, i32 %yy) + ret i32 %1 +} + define i32 @scmp_32_64(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: scmp_32_64: ; CHECK: @ %bb.0: diff --git a/llvm/test/CodeGen/ARM/ucmp.ll b/llvm/test/CodeGen/ARM/ucmp.ll index ad4af534ee8fe..bb0201454d1ea 100644 --- a/llvm/test/CodeGen/ARM/ucmp.ll +++ b/llvm/test/CodeGen/ARM/ucmp.ll @@ -4,12 +4,9 @@ define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind { ; CHECK-LABEL: ucmp_8_8: ; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r0, r1 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwlo r0, #1 -; CHECK-NEXT: movwhi r2, #1 -; CHECK-NEXT: sub r0, r2, r0 +; CHECK-NEXT: subs r0, r0, r1 +; CHECK-NEXT: movwhi r0, #1 +; CHECK-NEXT: mvnlo r0, #0 ; CHECK-NEXT: bx lr %1 = call i8 @llvm.ucmp(i8 %x, i8 %y) ret i8 %1 @@ -18,12 +15,9 @@ define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind { define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind { ; CHECK-LABEL: ucmp_8_16: ; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r0, r1 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwlo r0, #1 -; CHECK-NEXT: movwhi r2, #1 -; CHECK-NEXT: sub r0, r2, r0 +; CHECK-NEXT: subs r0, r0, r1 +; CHECK-NEXT: movwhi r0, #1 +; CHECK-NEXT: mvnlo r0, #0 ; CHECK-NEXT: bx lr %1 = call i8 @llvm.ucmp(i16 %x, i16 %y) ret i8 %1 @@ -32,12 +26,9 @@ define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind { define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: ucmp_8_32: ; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r0, r1 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwlo r0, #1 -; CHECK-NEXT: movwhi r2, #1 -; CHECK-NEXT: sub r0, r2, r0 +; CHECK-NEXT: subs r0, r0, r1 +; CHECK-NEXT: movwhi r0, #1 +; CHECK-NEXT: mvnlo r0, #0 ; CHECK-NEXT: bx lr %1 = call i8 @llvm.ucmp(i32 %x, i32 %y) ret i8 %1 @@ -92,12 +83,9 @@ define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind { define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: ucmp_32_32: ; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r0, r1 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwlo r0, #1 -; CHECK-NEXT: movwhi r2, #1 -; CHECK-NEXT: sub r0, r2, r0 +; CHECK-NEXT: subs r0, r0, r1 +; CHECK-NEXT: movwhi r0, #1 +; CHECK-NEXT: mvnlo r0, #0 ; CHECK-NEXT: bx lr %1 = call i32 @llvm.ucmp(i32 %x, i32 %y) ret i32 %1 diff --git a/llvm/test/CodeGen/Thumb/scmp.ll b/llvm/test/CodeGen/Thumb/scmp.ll index 661dbe97cdb3c..d74109347c369 100644 --- a/llvm/test/CodeGen/Thumb/scmp.ll +++ b/llvm/test/CodeGen/Thumb/scmp.ll @@ -1,50 +1,127 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=thumbv7-apple-darwin %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s -check-prefix=THUMB1 +; RUN: llc -mtriple=thumbv7-apple-darwin %s -o - | FileCheck %s -check-prefix=THUMB2 +; RUN: llc -mtriple thumbv8.1m.main-none-eabi -mattr=+mve,+lob -o - %s | FileCheck %s --check-prefix=V81M define i8 @scmp_8_8(i8 signext %x, i8 signext %y) nounwind { -; CHECK-LABEL: scmp_8_8: -; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r0, r1 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r2, #1 -; CHECK-NEXT: subs r0, r2, r0 -; CHECK-NEXT: bx lr +; THUMB1-LABEL: scmp_8_8: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: movs r2, #1 +; THUMB1-NEXT: movs r3, #0 +; THUMB1-NEXT: cmp r0, r1 +; THUMB1-NEXT: mov r0, r2 +; THUMB1-NEXT: bge .LBB0_3 +; THUMB1-NEXT: @ %bb.1: +; THUMB1-NEXT: ble .LBB0_4 +; THUMB1-NEXT: .LBB0_2: +; THUMB1-NEXT: subs r0, r2, r0 +; THUMB1-NEXT: bx lr +; THUMB1-NEXT: .LBB0_3: +; THUMB1-NEXT: mov r0, r3 +; THUMB1-NEXT: bgt .LBB0_2 +; THUMB1-NEXT: .LBB0_4: +; THUMB1-NEXT: mov r2, r3 +; THUMB1-NEXT: subs r0, r2, r0 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: scmp_8_8: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: subs r0, r0, r1 +; THUMB2-NEXT: it gt +; THUMB2-NEXT: movgt r0, #1 +; THUMB2-NEXT: it lt +; THUMB2-NEXT: movlt.w r0, #-1 +; THUMB2-NEXT: bx lr +; +; V81M-LABEL: scmp_8_8: +; V81M: @ %bb.0: +; V81M-NEXT: cmp r0, r1 +; V81M-NEXT: cset r0, gt +; V81M-NEXT: it lt +; V81M-NEXT: movlt.w r0, #-1 +; V81M-NEXT: bx lr %1 = call i8 @llvm.scmp(i8 %x, i8 %y) ret i8 %1 } define i8 @scmp_8_16(i16 signext %x, i16 signext %y) nounwind { -; CHECK-LABEL: scmp_8_16: -; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r0, r1 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r2, #1 -; CHECK-NEXT: subs r0, r2, r0 -; CHECK-NEXT: bx lr +; THUMB1-LABEL: scmp_8_16: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: movs r2, #1 +; THUMB1-NEXT: movs r3, #0 +; THUMB1-NEXT: cmp r0, r1 +; THUMB1-NEXT: mov r0, r2 +; THUMB1-NEXT: bge .LBB1_3 +; THUMB1-NEXT: @ %bb.1: +; THUMB1-NEXT: ble .LBB1_4 +; THUMB1-NEXT: .LBB1_2: +; THUMB1-NEXT: subs r0, r2, r0 +; THUMB1-NEXT: bx lr +; THUMB1-NEXT: .LBB1_3: +; THUMB1-NEXT: mov r0, r3 +; THUMB1-NEXT: bgt .LBB1_2 +; THUMB1-NEXT: .LBB1_4: +; THUMB1-NEXT: mov r2, r3 +; THUMB1-NEXT: subs r0, r2, r0 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: scmp_8_16: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: subs r0, r0, r1 +; THUMB2-NEXT: it gt +; THUMB2-NEXT: movgt r0, #1 +; THUMB2-NEXT: it lt +; THUMB2-NEXT: movlt.w r0, #-1 +; THUMB2-NEXT: bx lr +; +; V81M-LABEL: scmp_8_16: +; V81M: @ %bb.0: +; V81M-NEXT: cmp r0, r1 +; V81M-NEXT: cset r0, gt +; V81M-NEXT: it lt +; V81M-NEXT: movlt.w r0, #-1 +; V81M-NEXT: bx lr %1 = call i8 @llvm.scmp(i16 %x, i16 %y) ret i8 %1 } define i8 @scmp_8_32(i32 %x, i32 %y) nounwind { -; CHECK-LABEL: scmp_8_32: -; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r0, r1 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r2, #1 -; CHECK-NEXT: subs r0, r2, r0 -; CHECK-NEXT: bx lr +; THUMB1-LABEL: scmp_8_32: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: movs r2, #1 +; THUMB1-NEXT: movs r3, #0 +; THUMB1-NEXT: cmp r0, r1 +; THUMB1-NEXT: mov r0, r2 +; THUMB1-NEXT: bge .LBB2_3 +; THUMB1-NEXT: @ %bb.1: +; THUMB1-NEXT: ble .LBB2_4 +; THUMB1-NEXT: .LBB2_2: +; THUMB1-NEXT: subs r0, r2, r0 +; THUMB1-NEXT: bx lr +; THUMB1-NEXT: .LBB2_3: +; THUMB1-NEXT: mov r0, r3 +; THUMB1-NEXT: bgt .LBB2_2 +; THUMB1-NEXT: .LBB2_4: +; THUMB1-NEXT: mov r2, r3 +; THUMB1-NEXT: subs r0, r2, r0 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: scmp_8_32: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: subs r0, r0, r1 +; THUMB2-NEXT: it gt +; THUMB2-NEXT: movgt r0, #1 +; THUMB2-NEXT: it lt +; THUMB2-NEXT: movlt.w r0, #-1 +; THUMB2-NEXT: bx lr +; +; V81M-LABEL: scmp_8_32: +; V81M: @ %bb.0: +; V81M-NEXT: cmp r0, r1 +; V81M-NEXT: cset r0, gt +; V81M-NEXT: it lt +; V81M-NEXT: movlt.w r0, #-1 +; V81M-NEXT: bx lr %1 = call i8 @llvm.scmp(i32 %x, i32 %y) ret i8 %1 } @@ -52,18 +129,54 @@ define i8 @scmp_8_32(i32 %x, i32 %y) nounwind { define i8 @scmp_8_64(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: scmp_8_64: ; CHECK: @ %bb.0: -; CHECK-NEXT: subs.w r12, r0, r2 -; CHECK-NEXT: mov.w r9, #0 -; CHECK-NEXT: sbcs.w r12, r1, r3 -; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r12, #1 -; CHECK-NEXT: subs r0, r2, r0 -; CHECK-NEXT: sbcs.w r0, r3, r1 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r9, #1 -; CHECK-NEXT: sub.w r0, r9, r12 -; CHECK-NEXT: bx lr +; THUMB1-LABEL: scmp_8_64: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: .save {r4, r5, r6, lr} +; THUMB1-NEXT: push {r4, r5, r6, lr} +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: movs r5, #0 +; THUMB1-NEXT: subs r6, r0, r2 +; THUMB1-NEXT: mov r6, r1 +; THUMB1-NEXT: sbcs r6, r3 +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: blt .LBB3_2 +; THUMB1-NEXT: @ %bb.1: +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: .LBB3_2: +; THUMB1-NEXT: subs r0, r2, r0 +; THUMB1-NEXT: sbcs r3, r1 +; THUMB1-NEXT: blt .LBB3_4 +; THUMB1-NEXT: @ %bb.3: +; THUMB1-NEXT: mov r4, r5 +; THUMB1-NEXT: .LBB3_4: +; THUMB1-NEXT: subs r0, r4, r6 +; THUMB1-NEXT: pop {r4, r5, r6, pc} +; +; THUMB2-LABEL: scmp_8_64: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: subs.w r12, r0, r2 +; THUMB2-NEXT: mov.w r9, #0 +; THUMB2-NEXT: sbcs.w r12, r1, r3 +; THUMB2-NEXT: mov.w r12, #0 +; THUMB2-NEXT: it lt +; THUMB2-NEXT: movlt.w r12, #1 +; THUMB2-NEXT: subs r0, r2, r0 +; THUMB2-NEXT: sbcs.w r0, r3, r1 +; THUMB2-NEXT: it lt +; THUMB2-NEXT: movlt.w r9, #1 +; THUMB2-NEXT: sub.w r0, r9, r12 +; THUMB2-NEXT: bx lr +; +; V81M-LABEL: scmp_8_64: +; V81M: @ %bb.0: +; V81M-NEXT: subs.w r12, r0, r2 +; V81M-NEXT: sbcs.w r12, r1, r3 +; V81M-NEXT: cset r12, lt +; V81M-NEXT: subs r0, r2, r0 +; V81M-NEXT: sbcs.w r0, r3, r1 +; V81M-NEXT: cset r0, lt +; V81M-NEXT: sub.w r0, r0, r12 +; V81M-NEXT: bx lr %1 = call i8 @llvm.scmp(i64 %x, i64 %y) ret i8 %1 } @@ -91,22 +204,132 @@ define i8 @scmp_8_128(i128 %x, i128 %y) nounwind { ; CHECK-NEXT: movlt r5, #1 ; CHECK-NEXT: subs r0, r5, r6 ; CHECK-NEXT: pop {r4, r5, r6, pc} +; THUMB1-LABEL: scmp_8_128: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: .save {r4, r5, r6, r7, lr} +; THUMB1-NEXT: push {r4, r5, r6, r7, lr} +; THUMB1-NEXT: .pad #20 +; THUMB1-NEXT: sub sp, #20 +; THUMB1-NEXT: str r3, [sp, #16] @ 4-byte Spill +; THUMB1-NEXT: movs r3, #1 +; THUMB1-NEXT: str r3, [sp] @ 4-byte Spill +; THUMB1-NEXT: movs r3, #0 +; THUMB1-NEXT: str r3, [sp, #12] @ 4-byte Spill +; THUMB1-NEXT: ldr r6, [sp, #52] +; THUMB1-NEXT: add r7, sp, #40 +; THUMB1-NEXT: ldm r7, {r3, r5, r7} +; THUMB1-NEXT: subs r4, r0, r3 +; THUMB1-NEXT: str r1, [sp, #4] @ 4-byte Spill +; THUMB1-NEXT: mov r4, r1 +; THUMB1-NEXT: ldr r1, [sp] @ 4-byte Reload +; THUMB1-NEXT: sbcs r4, r5 +; THUMB1-NEXT: str r2, [sp, #8] @ 4-byte Spill +; THUMB1-NEXT: mov r4, r2 +; THUMB1-NEXT: sbcs r4, r7 +; THUMB1-NEXT: ldr r4, [sp, #16] @ 4-byte Reload +; THUMB1-NEXT: sbcs r4, r6 +; THUMB1-NEXT: mov r2, r1 +; THUMB1-NEXT: blt .LBB4_2 +; THUMB1-NEXT: @ %bb.1: +; THUMB1-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; THUMB1-NEXT: .LBB4_2: +; THUMB1-NEXT: subs r0, r3, r0 +; THUMB1-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; THUMB1-NEXT: sbcs r5, r0 +; THUMB1-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; THUMB1-NEXT: sbcs r7, r0 +; THUMB1-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; THUMB1-NEXT: sbcs r6, r0 +; THUMB1-NEXT: blt .LBB4_4 +; THUMB1-NEXT: @ %bb.3: +; THUMB1-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; THUMB1-NEXT: .LBB4_4: +; THUMB1-NEXT: subs r0, r1, r2 +; THUMB1-NEXT: add sp, #20 +; THUMB1-NEXT: pop {r4, r5, r6, r7, pc} +; +; THUMB2-LABEL: scmp_8_128: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: push {r4, r5, r6, lr} +; THUMB2-NEXT: add.w lr, sp, #16 +; THUMB2-NEXT: ldr r4, [sp, #28] +; THUMB2-NEXT: movs r5, #0 +; THUMB2-NEXT: ldm.w lr, {r9, r12, lr} +; THUMB2-NEXT: subs.w r6, r0, r9 +; THUMB2-NEXT: sbcs.w r6, r1, r12 +; THUMB2-NEXT: sbcs.w r6, r2, lr +; THUMB2-NEXT: sbcs.w r6, r3, r4 +; THUMB2-NEXT: mov.w r6, #0 +; THUMB2-NEXT: it lt +; THUMB2-NEXT: movlt r6, #1 +; THUMB2-NEXT: subs.w r0, r9, r0 +; THUMB2-NEXT: sbcs.w r0, r12, r1 +; THUMB2-NEXT: sbcs.w r0, lr, r2 +; THUMB2-NEXT: sbcs.w r0, r4, r3 +; THUMB2-NEXT: it lt +; THUMB2-NEXT: movlt r5, #1 +; THUMB2-NEXT: subs r0, r5, r6 +; THUMB2-NEXT: pop {r4, r5, r6, pc} +; +; V81M-LABEL: scmp_8_128: +; V81M: @ %bb.0: +; V81M-NEXT: .save {r4, r5, r6, lr} +; V81M-NEXT: push {r4, r5, r6, lr} +; V81M-NEXT: ldrd r5, r4, [sp, #16] +; V81M-NEXT: ldrd lr, r12, [sp, #24] +; V81M-NEXT: subs r6, r0, r5 +; V81M-NEXT: sbcs.w r6, r1, r4 +; V81M-NEXT: sbcs.w r6, r2, lr +; V81M-NEXT: sbcs.w r6, r3, r12 +; V81M-NEXT: cset r6, lt +; V81M-NEXT: subs r0, r5, r0 +; V81M-NEXT: sbcs.w r0, r4, r1 +; V81M-NEXT: sbcs.w r0, lr, r2 +; V81M-NEXT: sbcs.w r0, r12, r3 +; V81M-NEXT: cset r0, lt +; V81M-NEXT: subs r0, r0, r6 +; V81M-NEXT: pop {r4, r5, r6, pc} %1 = call i8 @llvm.scmp(i128 %x, i128 %y) ret i8 %1 } define i32 @scmp_32_32(i32 %x, i32 %y) nounwind { -; CHECK-LABEL: scmp_32_32: -; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r0, r1 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r2, #1 -; CHECK-NEXT: subs r0, r2, r0 -; CHECK-NEXT: bx lr +; THUMB1-LABEL: scmp_32_32: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: movs r2, #1 +; THUMB1-NEXT: movs r3, #0 +; THUMB1-NEXT: cmp r0, r1 +; THUMB1-NEXT: mov r0, r2 +; THUMB1-NEXT: bge .LBB5_3 +; THUMB1-NEXT: @ %bb.1: +; THUMB1-NEXT: ble .LBB5_4 +; THUMB1-NEXT: .LBB5_2: +; THUMB1-NEXT: subs r0, r2, r0 +; THUMB1-NEXT: bx lr +; THUMB1-NEXT: .LBB5_3: +; THUMB1-NEXT: mov r0, r3 +; THUMB1-NEXT: bgt .LBB5_2 +; THUMB1-NEXT: .LBB5_4: +; THUMB1-NEXT: mov r2, r3 +; THUMB1-NEXT: subs r0, r2, r0 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: scmp_32_32: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: subs r0, r0, r1 +; THUMB2-NEXT: it gt +; THUMB2-NEXT: movgt r0, #1 +; THUMB2-NEXT: it lt +; THUMB2-NEXT: movlt.w r0, #-1 +; THUMB2-NEXT: bx lr +; +; V81M-LABEL: scmp_32_32: +; V81M: @ %bb.0: +; V81M-NEXT: cmp r0, r1 +; V81M-NEXT: cset r0, gt +; V81M-NEXT: it lt +; V81M-NEXT: movlt.w r0, #-1 +; V81M-NEXT: bx lr %1 = call i32 @llvm.scmp(i32 %x, i32 %y) ret i32 %1 } @@ -126,6 +349,54 @@ define i32 @scmp_32_64(i64 %x, i64 %y) nounwind { ; CHECK-NEXT: movlt.w r9, #1 ; CHECK-NEXT: sub.w r0, r9, r12 ; CHECK-NEXT: bx lr +; THUMB1-LABEL: scmp_32_64: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: .save {r4, r5, r6, lr} +; THUMB1-NEXT: push {r4, r5, r6, lr} +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: movs r5, #0 +; THUMB1-NEXT: subs r6, r0, r2 +; THUMB1-NEXT: mov r6, r1 +; THUMB1-NEXT: sbcs r6, r3 +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: blt .LBB6_2 +; THUMB1-NEXT: @ %bb.1: +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: .LBB6_2: +; THUMB1-NEXT: subs r0, r2, r0 +; THUMB1-NEXT: sbcs r3, r1 +; THUMB1-NEXT: blt .LBB6_4 +; THUMB1-NEXT: @ %bb.3: +; THUMB1-NEXT: mov r4, r5 +; THUMB1-NEXT: .LBB6_4: +; THUMB1-NEXT: subs r0, r4, r6 +; THUMB1-NEXT: pop {r4, r5, r6, pc} +; +; THUMB2-LABEL: scmp_32_64: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: subs.w r12, r0, r2 +; THUMB2-NEXT: mov.w r9, #0 +; THUMB2-NEXT: sbcs.w r12, r1, r3 +; THUMB2-NEXT: mov.w r12, #0 +; THUMB2-NEXT: it lt +; THUMB2-NEXT: movlt.w r12, #1 +; THUMB2-NEXT: subs r0, r2, r0 +; THUMB2-NEXT: sbcs.w r0, r3, r1 +; THUMB2-NEXT: it lt +; THUMB2-NEXT: movlt.w r9, #1 +; THUMB2-NEXT: sub.w r0, r9, r12 +; THUMB2-NEXT: bx lr +; +; V81M-LABEL: scmp_32_64: +; V81M: @ %bb.0: +; V81M-NEXT: subs.w r12, r0, r2 +; V81M-NEXT: sbcs.w r12, r1, r3 +; V81M-NEXT: cset r12, lt +; V81M-NEXT: subs r0, r2, r0 +; V81M-NEXT: sbcs.w r0, r3, r1 +; V81M-NEXT: cset r0, lt +; V81M-NEXT: sub.w r0, r0, r12 +; V81M-NEXT: bx lr %1 = call i32 @llvm.scmp(i64 %x, i64 %y) ret i32 %1 } @@ -146,6 +417,57 @@ define i64 @scmp_64_64(i64 %x, i64 %y) nounwind { ; CHECK-NEXT: sub.w r0, r9, r12 ; CHECK-NEXT: asrs r1, r0, #31 ; CHECK-NEXT: bx lr +; THUMB1-LABEL: scmp_64_64: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: .save {r4, r5, r6, lr} +; THUMB1-NEXT: push {r4, r5, r6, lr} +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: movs r5, #0 +; THUMB1-NEXT: subs r6, r0, r2 +; THUMB1-NEXT: mov r6, r1 +; THUMB1-NEXT: sbcs r6, r3 +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: blt .LBB7_2 +; THUMB1-NEXT: @ %bb.1: +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: .LBB7_2: +; THUMB1-NEXT: subs r0, r2, r0 +; THUMB1-NEXT: sbcs r3, r1 +; THUMB1-NEXT: blt .LBB7_4 +; THUMB1-NEXT: @ %bb.3: +; THUMB1-NEXT: mov r4, r5 +; THUMB1-NEXT: .LBB7_4: +; THUMB1-NEXT: subs r0, r4, r6 +; THUMB1-NEXT: asrs r1, r0, #31 +; THUMB1-NEXT: pop {r4, r5, r6, pc} +; +; THUMB2-LABEL: scmp_64_64: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: subs.w r12, r0, r2 +; THUMB2-NEXT: mov.w r9, #0 +; THUMB2-NEXT: sbcs.w r12, r1, r3 +; THUMB2-NEXT: mov.w r12, #0 +; THUMB2-NEXT: it lt +; THUMB2-NEXT: movlt.w r12, #1 +; THUMB2-NEXT: subs r0, r2, r0 +; THUMB2-NEXT: sbcs.w r0, r3, r1 +; THUMB2-NEXT: it lt +; THUMB2-NEXT: movlt.w r9, #1 +; THUMB2-NEXT: sub.w r0, r9, r12 +; THUMB2-NEXT: asrs r1, r0, #31 +; THUMB2-NEXT: bx lr +; +; V81M-LABEL: scmp_64_64: +; V81M: @ %bb.0: +; V81M-NEXT: subs.w r12, r0, r2 +; V81M-NEXT: sbcs.w r12, r1, r3 +; V81M-NEXT: cset r12, lt +; V81M-NEXT: subs r0, r2, r0 +; V81M-NEXT: sbcs.w r0, r3, r1 +; V81M-NEXT: cset r0, lt +; V81M-NEXT: sub.w r0, r0, r12 +; V81M-NEXT: asrs r1, r0, #31 +; V81M-NEXT: bx lr %1 = call i64 @llvm.scmp(i64 %x, i64 %y) ret i64 %1 } diff --git a/llvm/test/CodeGen/Thumb/ucmp.ll b/llvm/test/CodeGen/Thumb/ucmp.ll index 7e6d0a323b11c..cce15afaeb3ab 100644 --- a/llvm/test/CodeGen/Thumb/ucmp.ll +++ b/llvm/test/CodeGen/Thumb/ucmp.ll @@ -1,151 +1,376 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=thumbv7-apple-darwin %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s -check-prefix=THUMB1 +; RUN: llc -mtriple=thumbv7-apple-darwin %s -o - | FileCheck %s -check-prefix=THUMB2 +; RUN: llc -mtriple thumbv8.1m.main-none-eabi -mattr=+mve,+lob -o - %s | FileCheck %s --check-prefix=V81M define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind { -; CHECK-LABEL: ucmp_8_8: -; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r0, r1 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r0, #1 -; CHECK-NEXT: it hi -; CHECK-NEXT: movhi r2, #1 -; CHECK-NEXT: subs r0, r2, r0 -; CHECK-NEXT: bx lr +; THUMB1-LABEL: ucmp_8_8: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: subs r2, r0, r1 +; THUMB1-NEXT: sbcs r2, r2 +; THUMB1-NEXT: cmp r1, r0 +; THUMB1-NEXT: sbcs r1, r1 +; THUMB1-NEXT: subs r0, r2, r1 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: ucmp_8_8: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: subs r0, r0, r1 +; THUMB2-NEXT: it hi +; THUMB2-NEXT: movhi r0, #1 +; THUMB2-NEXT: it lo +; THUMB2-NEXT: movlo.w r0, #-1 +; THUMB2-NEXT: bx lr +; +; V81M-LABEL: ucmp_8_8: +; V81M: @ %bb.0: +; V81M-NEXT: cmp r0, r1 +; V81M-NEXT: cset r0, hi +; V81M-NEXT: it lo +; V81M-NEXT: movlo.w r0, #-1 +; V81M-NEXT: bx lr %1 = call i8 @llvm.ucmp(i8 %x, i8 %y) ret i8 %1 } define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind { -; CHECK-LABEL: ucmp_8_16: -; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r0, r1 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r0, #1 -; CHECK-NEXT: it hi -; CHECK-NEXT: movhi r2, #1 -; CHECK-NEXT: subs r0, r2, r0 -; CHECK-NEXT: bx lr +; THUMB1-LABEL: ucmp_8_16: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: subs r2, r0, r1 +; THUMB1-NEXT: sbcs r2, r2 +; THUMB1-NEXT: cmp r1, r0 +; THUMB1-NEXT: sbcs r1, r1 +; THUMB1-NEXT: subs r0, r2, r1 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: ucmp_8_16: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: subs r0, r0, r1 +; THUMB2-NEXT: it hi +; THUMB2-NEXT: movhi r0, #1 +; THUMB2-NEXT: it lo +; THUMB2-NEXT: movlo.w r0, #-1 +; THUMB2-NEXT: bx lr +; +; V81M-LABEL: ucmp_8_16: +; V81M: @ %bb.0: +; V81M-NEXT: cmp r0, r1 +; V81M-NEXT: cset r0, hi +; V81M-NEXT: it lo +; V81M-NEXT: movlo.w r0, #-1 +; V81M-NEXT: bx lr %1 = call i8 @llvm.ucmp(i16 %x, i16 %y) ret i8 %1 } define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind { -; CHECK-LABEL: ucmp_8_32: -; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r0, r1 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r0, #1 -; CHECK-NEXT: it hi -; CHECK-NEXT: movhi r2, #1 -; CHECK-NEXT: subs r0, r2, r0 -; CHECK-NEXT: bx lr +; THUMB1-LABEL: ucmp_8_32: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: subs r2, r0, r1 +; THUMB1-NEXT: sbcs r2, r2 +; THUMB1-NEXT: cmp r1, r0 +; THUMB1-NEXT: sbcs r1, r1 +; THUMB1-NEXT: subs r0, r2, r1 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: ucmp_8_32: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: subs r0, r0, r1 +; THUMB2-NEXT: it hi +; THUMB2-NEXT: movhi r0, #1 +; THUMB2-NEXT: it lo +; THUMB2-NEXT: movlo.w r0, #-1 +; THUMB2-NEXT: bx lr +; +; V81M-LABEL: ucmp_8_32: +; V81M: @ %bb.0: +; V81M-NEXT: cmp r0, r1 +; V81M-NEXT: cset r0, hi +; V81M-NEXT: it lo +; V81M-NEXT: movlo.w r0, #-1 +; V81M-NEXT: bx lr %1 = call i8 @llvm.ucmp(i32 %x, i32 %y) ret i8 %1 } define i8 @ucmp_8_64(i64 %x, i64 %y) nounwind { -; CHECK-LABEL: ucmp_8_64: -; CHECK: @ %bb.0: -; CHECK-NEXT: subs.w r12, r0, r2 -; CHECK-NEXT: mov.w r9, #0 -; CHECK-NEXT: sbcs.w r12, r1, r3 -; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo.w r12, #1 -; CHECK-NEXT: subs r0, r2, r0 -; CHECK-NEXT: sbcs.w r0, r3, r1 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo.w r9, #1 -; CHECK-NEXT: sub.w r0, r9, r12 -; CHECK-NEXT: bx lr +; THUMB1-LABEL: ucmp_8_64: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: .save {r4, r5, r6, lr} +; THUMB1-NEXT: push {r4, r5, r6, lr} +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: movs r5, #0 +; THUMB1-NEXT: subs r6, r0, r2 +; THUMB1-NEXT: mov r6, r1 +; THUMB1-NEXT: sbcs r6, r3 +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: blo .LBB3_2 +; THUMB1-NEXT: @ %bb.1: +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: .LBB3_2: +; THUMB1-NEXT: subs r0, r2, r0 +; THUMB1-NEXT: sbcs r3, r1 +; THUMB1-NEXT: blo .LBB3_4 +; THUMB1-NEXT: @ %bb.3: +; THUMB1-NEXT: mov r4, r5 +; THUMB1-NEXT: .LBB3_4: +; THUMB1-NEXT: subs r0, r4, r6 +; THUMB1-NEXT: pop {r4, r5, r6, pc} +; +; THUMB2-LABEL: ucmp_8_64: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: subs.w r12, r0, r2 +; THUMB2-NEXT: mov.w r9, #0 +; THUMB2-NEXT: sbcs.w r12, r1, r3 +; THUMB2-NEXT: mov.w r12, #0 +; THUMB2-NEXT: it lo +; THUMB2-NEXT: movlo.w r12, #1 +; THUMB2-NEXT: subs r0, r2, r0 +; THUMB2-NEXT: sbcs.w r0, r3, r1 +; THUMB2-NEXT: it lo +; THUMB2-NEXT: movlo.w r9, #1 +; THUMB2-NEXT: sub.w r0, r9, r12 +; THUMB2-NEXT: bx lr +; +; V81M-LABEL: ucmp_8_64: +; V81M: @ %bb.0: +; V81M-NEXT: subs.w r12, r0, r2 +; V81M-NEXT: sbcs.w r12, r1, r3 +; V81M-NEXT: cset r12, lo +; V81M-NEXT: subs r0, r2, r0 +; V81M-NEXT: sbcs.w r0, r3, r1 +; V81M-NEXT: cset r0, lo +; V81M-NEXT: sub.w r0, r0, r12 +; V81M-NEXT: bx lr %1 = call i8 @llvm.ucmp(i64 %x, i64 %y) ret i8 %1 } define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind { -; CHECK-LABEL: ucmp_8_128: -; CHECK: @ %bb.0: -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: add.w lr, sp, #16 -; CHECK-NEXT: ldr r4, [sp, #28] -; CHECK-NEXT: movs r5, #0 -; CHECK-NEXT: ldm.w lr, {r9, r12, lr} -; CHECK-NEXT: subs.w r6, r0, r9 -; CHECK-NEXT: sbcs.w r6, r1, r12 -; CHECK-NEXT: sbcs.w r6, r2, lr -; CHECK-NEXT: sbcs.w r6, r3, r4 -; CHECK-NEXT: mov.w r6, #0 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r6, #1 -; CHECK-NEXT: subs.w r0, r9, r0 -; CHECK-NEXT: sbcs.w r0, r12, r1 -; CHECK-NEXT: sbcs.w r0, lr, r2 -; CHECK-NEXT: sbcs.w r0, r4, r3 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r5, #1 -; CHECK-NEXT: subs r0, r5, r6 -; CHECK-NEXT: pop {r4, r5, r6, pc} +; THUMB1-LABEL: ucmp_8_128: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: .save {r4, r5, r6, r7, lr} +; THUMB1-NEXT: push {r4, r5, r6, r7, lr} +; THUMB1-NEXT: .pad #20 +; THUMB1-NEXT: sub sp, #20 +; THUMB1-NEXT: str r3, [sp, #16] @ 4-byte Spill +; THUMB1-NEXT: movs r3, #1 +; THUMB1-NEXT: str r3, [sp] @ 4-byte Spill +; THUMB1-NEXT: movs r3, #0 +; THUMB1-NEXT: str r3, [sp, #12] @ 4-byte Spill +; THUMB1-NEXT: ldr r6, [sp, #52] +; THUMB1-NEXT: add r7, sp, #40 +; THUMB1-NEXT: ldm r7, {r3, r5, r7} +; THUMB1-NEXT: subs r4, r0, r3 +; THUMB1-NEXT: str r1, [sp, #4] @ 4-byte Spill +; THUMB1-NEXT: mov r4, r1 +; THUMB1-NEXT: ldr r1, [sp] @ 4-byte Reload +; THUMB1-NEXT: sbcs r4, r5 +; THUMB1-NEXT: str r2, [sp, #8] @ 4-byte Spill +; THUMB1-NEXT: mov r4, r2 +; THUMB1-NEXT: sbcs r4, r7 +; THUMB1-NEXT: ldr r4, [sp, #16] @ 4-byte Reload +; THUMB1-NEXT: sbcs r4, r6 +; THUMB1-NEXT: mov r2, r1 +; THUMB1-NEXT: blo .LBB4_2 +; THUMB1-NEXT: @ %bb.1: +; THUMB1-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; THUMB1-NEXT: .LBB4_2: +; THUMB1-NEXT: subs r0, r3, r0 +; THUMB1-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; THUMB1-NEXT: sbcs r5, r0 +; THUMB1-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; THUMB1-NEXT: sbcs r7, r0 +; THUMB1-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; THUMB1-NEXT: sbcs r6, r0 +; THUMB1-NEXT: blo .LBB4_4 +; THUMB1-NEXT: @ %bb.3: +; THUMB1-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; THUMB1-NEXT: .LBB4_4: +; THUMB1-NEXT: subs r0, r1, r2 +; THUMB1-NEXT: add sp, #20 +; THUMB1-NEXT: pop {r4, r5, r6, r7, pc} +; +; THUMB2-LABEL: ucmp_8_128: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: push {r4, r5, r6, lr} +; THUMB2-NEXT: add.w lr, sp, #16 +; THUMB2-NEXT: ldr r4, [sp, #28] +; THUMB2-NEXT: movs r5, #0 +; THUMB2-NEXT: ldm.w lr, {r9, r12, lr} +; THUMB2-NEXT: subs.w r6, r0, r9 +; THUMB2-NEXT: sbcs.w r6, r1, r12 +; THUMB2-NEXT: sbcs.w r6, r2, lr +; THUMB2-NEXT: sbcs.w r6, r3, r4 +; THUMB2-NEXT: mov.w r6, #0 +; THUMB2-NEXT: it lo +; THUMB2-NEXT: movlo r6, #1 +; THUMB2-NEXT: subs.w r0, r9, r0 +; THUMB2-NEXT: sbcs.w r0, r12, r1 +; THUMB2-NEXT: sbcs.w r0, lr, r2 +; THUMB2-NEXT: sbcs.w r0, r4, r3 +; THUMB2-NEXT: it lo +; THUMB2-NEXT: movlo r5, #1 +; THUMB2-NEXT: subs r0, r5, r6 +; THUMB2-NEXT: pop {r4, r5, r6, pc} +; +; V81M-LABEL: ucmp_8_128: +; V81M: @ %bb.0: +; V81M-NEXT: .save {r4, r5, r6, lr} +; V81M-NEXT: push {r4, r5, r6, lr} +; V81M-NEXT: ldrd r5, r4, [sp, #16] +; V81M-NEXT: ldrd lr, r12, [sp, #24] +; V81M-NEXT: subs r6, r0, r5 +; V81M-NEXT: sbcs.w r6, r1, r4 +; V81M-NEXT: sbcs.w r6, r2, lr +; V81M-NEXT: sbcs.w r6, r3, r12 +; V81M-NEXT: cset r6, lo +; V81M-NEXT: subs r0, r5, r0 +; V81M-NEXT: sbcs.w r0, r4, r1 +; V81M-NEXT: sbcs.w r0, lr, r2 +; V81M-NEXT: sbcs.w r0, r12, r3 +; V81M-NEXT: cset r0, lo +; V81M-NEXT: subs r0, r0, r6 +; V81M-NEXT: pop {r4, r5, r6, pc} %1 = call i8 @llvm.ucmp(i128 %x, i128 %y) ret i8 %1 } define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind { -; CHECK-LABEL: ucmp_32_32: -; CHECK: @ %bb.0: -; CHECK-NEXT: cmp r0, r1 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r0, #1 -; CHECK-NEXT: it hi -; CHECK-NEXT: movhi r2, #1 -; CHECK-NEXT: subs r0, r2, r0 -; CHECK-NEXT: bx lr +; THUMB1-LABEL: ucmp_32_32: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: subs r2, r0, r1 +; THUMB1-NEXT: sbcs r2, r2 +; THUMB1-NEXT: cmp r1, r0 +; THUMB1-NEXT: sbcs r1, r1 +; THUMB1-NEXT: subs r0, r2, r1 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: ucmp_32_32: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: subs r0, r0, r1 +; THUMB2-NEXT: it hi +; THUMB2-NEXT: movhi r0, #1 +; THUMB2-NEXT: it lo +; THUMB2-NEXT: movlo.w r0, #-1 +; THUMB2-NEXT: bx lr +; +; V81M-LABEL: ucmp_32_32: +; V81M: @ %bb.0: +; V81M-NEXT: cmp r0, r1 +; V81M-NEXT: cset r0, hi +; V81M-NEXT: it lo +; V81M-NEXT: movlo.w r0, #-1 +; V81M-NEXT: bx lr %1 = call i32 @llvm.ucmp(i32 %x, i32 %y) ret i32 %1 } define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind { -; CHECK-LABEL: ucmp_32_64: -; CHECK: @ %bb.0: -; CHECK-NEXT: subs.w r12, r0, r2 -; CHECK-NEXT: mov.w r9, #0 -; CHECK-NEXT: sbcs.w r12, r1, r3 -; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo.w r12, #1 -; CHECK-NEXT: subs r0, r2, r0 -; CHECK-NEXT: sbcs.w r0, r3, r1 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo.w r9, #1 -; CHECK-NEXT: sub.w r0, r9, r12 -; CHECK-NEXT: bx lr +; THUMB1-LABEL: ucmp_32_64: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: .save {r4, r5, r6, lr} +; THUMB1-NEXT: push {r4, r5, r6, lr} +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: movs r5, #0 +; THUMB1-NEXT: subs r6, r0, r2 +; THUMB1-NEXT: mov r6, r1 +; THUMB1-NEXT: sbcs r6, r3 +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: blo .LBB6_2 +; THUMB1-NEXT: @ %bb.1: +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: .LBB6_2: +; THUMB1-NEXT: subs r0, r2, r0 +; THUMB1-NEXT: sbcs r3, r1 +; THUMB1-NEXT: blo .LBB6_4 +; THUMB1-NEXT: @ %bb.3: +; THUMB1-NEXT: mov r4, r5 +; THUMB1-NEXT: .LBB6_4: +; THUMB1-NEXT: subs r0, r4, r6 +; THUMB1-NEXT: pop {r4, r5, r6, pc} +; +; THUMB2-LABEL: ucmp_32_64: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: subs.w r12, r0, r2 +; THUMB2-NEXT: mov.w r9, #0 +; THUMB2-NEXT: sbcs.w r12, r1, r3 +; THUMB2-NEXT: mov.w r12, #0 +; THUMB2-NEXT: it lo +; THUMB2-NEXT: movlo.w r12, #1 +; THUMB2-NEXT: subs r0, r2, r0 +; THUMB2-NEXT: sbcs.w r0, r3, r1 +; THUMB2-NEXT: it lo +; THUMB2-NEXT: movlo.w r9, #1 +; THUMB2-NEXT: sub.w r0, r9, r12 +; THUMB2-NEXT: bx lr +; +; V81M-LABEL: ucmp_32_64: +; V81M: @ %bb.0: +; V81M-NEXT: subs.w r12, r0, r2 +; V81M-NEXT: sbcs.w r12, r1, r3 +; V81M-NEXT: cset r12, lo +; V81M-NEXT: subs r0, r2, r0 +; V81M-NEXT: sbcs.w r0, r3, r1 +; V81M-NEXT: cset r0, lo +; V81M-NEXT: sub.w r0, r0, r12 +; V81M-NEXT: bx lr %1 = call i32 @llvm.ucmp(i64 %x, i64 %y) ret i32 %1 } define i64 @ucmp_64_64(i64 %x, i64 %y) nounwind { -; CHECK-LABEL: ucmp_64_64: -; CHECK: @ %bb.0: -; CHECK-NEXT: subs.w r12, r0, r2 -; CHECK-NEXT: mov.w r9, #0 -; CHECK-NEXT: sbcs.w r12, r1, r3 -; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo.w r12, #1 -; CHECK-NEXT: subs r0, r2, r0 -; CHECK-NEXT: sbcs.w r0, r3, r1 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo.w r9, #1 -; CHECK-NEXT: sub.w r0, r9, r12 -; CHECK-NEXT: asrs r1, r0, #31 -; CHECK-NEXT: bx lr +; THUMB1-LABEL: ucmp_64_64: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: .save {r4, r5, r6, lr} +; THUMB1-NEXT: push {r4, r5, r6, lr} +; THUMB1-NEXT: movs r4, #1 +; THUMB1-NEXT: movs r5, #0 +; THUMB1-NEXT: subs r6, r0, r2 +; THUMB1-NEXT: mov r6, r1 +; THUMB1-NEXT: sbcs r6, r3 +; THUMB1-NEXT: mov r6, r4 +; THUMB1-NEXT: blo .LBB7_2 +; THUMB1-NEXT: @ %bb.1: +; THUMB1-NEXT: mov r6, r5 +; THUMB1-NEXT: .LBB7_2: +; THUMB1-NEXT: subs r0, r2, r0 +; THUMB1-NEXT: sbcs r3, r1 +; THUMB1-NEXT: blo .LBB7_4 +; THUMB1-NEXT: @ %bb.3: +; THUMB1-NEXT: mov r4, r5 +; THUMB1-NEXT: .LBB7_4: +; THUMB1-NEXT: subs r0, r4, r6 +; THUMB1-NEXT: asrs r1, r0, #31 +; THUMB1-NEXT: pop {r4, r5, r6, pc} +; +; THUMB2-LABEL: ucmp_64_64: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: subs.w r12, r0, r2 +; THUMB2-NEXT: mov.w r9, #0 +; THUMB2-NEXT: sbcs.w r12, r1, r3 +; THUMB2-NEXT: mov.w r12, #0 +; THUMB2-NEXT: it lo +; THUMB2-NEXT: movlo.w r12, #1 +; THUMB2-NEXT: subs r0, r2, r0 +; THUMB2-NEXT: sbcs.w r0, r3, r1 +; THUMB2-NEXT: it lo +; THUMB2-NEXT: movlo.w r9, #1 +; THUMB2-NEXT: sub.w r0, r9, r12 +; THUMB2-NEXT: asrs r1, r0, #31 +; THUMB2-NEXT: bx lr +; +; V81M-LABEL: ucmp_64_64: +; V81M: @ %bb.0: +; V81M-NEXT: subs.w r12, r0, r2 +; V81M-NEXT: sbcs.w r12, r1, r3 +; V81M-NEXT: cset r12, lo +; V81M-NEXT: subs r0, r2, r0 +; V81M-NEXT: sbcs.w r0, r3, r1 +; V81M-NEXT: cset r0, lo +; V81M-NEXT: sub.w r0, r0, r12 +; V81M-NEXT: asrs r1, r0, #31 +; V81M-NEXT: bx lr %1 = call i64 @llvm.ucmp(i64 %x, i64 %y) ret i64 %1 }