From 85d98f0a8ab6166fc6120065f39678a5174ca8bb Mon Sep 17 00:00:00 2001 From: Wesley Wiser Date: Sun, 3 Aug 2025 05:06:23 -0500 Subject: [PATCH 1/2] [X86] Correct 32-bit immediate assertion and fix 64-bit lowering for huge frame offsets (#123872) The assertion previously did not work correctly because the operand was being truncated to an `int` prior to comparison. Change the assertion into a a reported error as suggested in https://github.com/llvm/llvm-project/pull/101840#issuecomment-2304992425 by @arsenm Finally, fix the lowering on 64-bit targets so that offsets larger than 32-bit are correctly addressed and add tests for various reported issues. --- llvm/lib/CodeGen/PrologEpilogInserter.cpp | 2 +- llvm/lib/Target/X86/X86FrameLowering.cpp | 11 ++- llvm/lib/Target/X86/X86RegisterInfo.cpp | 41 ++++++++-- llvm/lib/Target/X86/X86RegisterInfo.h | 5 ++ llvm/test/CodeGen/X86/avx512f-large-stack.ll | 23 ++++++ llvm/test/CodeGen/X86/huge-stack.ll | 72 +++++++++++++--- .../X86/large-displacements-fastisel.ll | 18 ++++ llvm/test/CodeGen/X86/large-displacements.ll | 82 +++++++++++++++++++ .../test/CodeGen/X86/merge-huge-sp-updates.ll | 4 +- .../CodeGen/X86/stack-clash-extra-huge.ll | 28 +++---- llvm/test/CodeGen/X86/stack-clash-huge.ll | 36 ++++---- .../CodeGen/X86/win64-stackprobe-overflow.ll | 2 +- 12 files changed, 269 insertions(+), 55 deletions(-) create mode 100644 llvm/test/CodeGen/X86/avx512f-large-stack.ll create mode 100644 llvm/test/CodeGen/X86/large-displacements-fastisel.ll create mode 100644 llvm/test/CodeGen/X86/large-displacements.ll diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index f66f54682c84c..7b751bacf5556 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -1550,7 +1550,7 @@ void PEIImpl::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. - TRI.eliminateFrameIndex(MI, SPAdj, i); + TRI.eliminateFrameIndex(MI, SPAdj, i, RS); // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 95ed5908e2314..cba7843d53e3f 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/EHPersonalities.h" @@ -2678,7 +2679,7 @@ StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, // object. // We need to factor in additional offsets applied during the prologue to the // frame, base, and stack pointer depending on which is used. - int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea(); + int64_t Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea(); const X86MachineFunctionInfo *X86FI = MF.getInfo(); unsigned CSSize = X86FI->getCalleeSavedFrameSize(); uint64_t StackSize = MFI.getStackSize(); @@ -4212,6 +4213,14 @@ void X86FrameLowering::processFunctionBeforeFrameFinalized( // emitPrologue if it gets called and emits CFI. MF.setHasWinCFI(false); + MachineFrameInfo &MFI = MF.getFrameInfo(); + // If the frame is big enough that we might need to scavenge a register to + // handle huge offsets, reserve a stack slot for that now. + if (!isInt<32>(MFI.estimateStackSize(MF))) { + int FI = MFI.CreateStackObject(SlotSize, Align(SlotSize), false); + RS->addScavengingFrameIndex(FI); + } + // If we are using Windows x64 CFI, ensure that the stack is always 8 byte // aligned. The format doesn't support misaligned stack adjustments. if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 83b11eede829e..20c6e61da5fd3 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "X86RegisterInfo.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "X86FrameLowering.h" #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" @@ -21,8 +22,8 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TileShapeInfo.h" @@ -907,7 +908,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); // Determine base register and offset. - int FIOffset; + int64_t FIOffset; Register BasePtr; if (MI.isReturn()) { assert((!hasStackRealignment(MF) || @@ -958,11 +959,37 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } if (MI.getOperand(FIOperandNum+3).isImm()) { - // Offset is a 32-bit integer. - int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm()); - int Offset = FIOffset + Imm; - assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && - "Requesting 64-bit offset in 32-bit immediate!"); + int64_t Imm = MI.getOperand(FIOperandNum + 3).getImm(); + int64_t Offset = FIOffset + Imm; + bool FitsIn32Bits = isInt<32>(Offset); + // If the offset will not fit in a 32-bit displacement, then for 64-bit + // targets, scavenge a register to hold it. Otherwise... + if (Is64Bit && !FitsIn32Bits) { + assert(RS && "RegisterScavenger was NULL"); + const X86InstrInfo *TII = MF.getSubtarget().getInstrInfo(); + const DebugLoc &DL = MI.getDebugLoc(); + + RS->enterBasicBlockEnd(MBB); + RS->backward(std::next(II)); + + Register ScratchReg = RS->scavengeRegisterBackwards( + X86::GR64RegClass, II, /*RestoreAfter=*/false, /*SPAdj=*/0, + /*AllowSpill=*/true); + assert(ScratchReg != 0 && "scratch reg was 0"); + RS->setRegUsed(ScratchReg); + + BuildMI(MBB, II, DL, TII->get(X86::MOV64ri), ScratchReg).addImm(Offset); + + MI.getOperand(FIOperandNum + 3).setImm(0); + MI.getOperand(FIOperandNum + 2).setReg(ScratchReg); + + return false; + } + + // ... for 32-bit targets, this is a bug! + if (!Is64Bit && !FitsIn32Bits) + MI.emitGenericError("64-bit offset calculated but target is 32-bit"); + if (Offset != 0 || !tryOptimizeLEAtoMOV(II)) MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); } else { diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h index 19b409ae619d2..2f4c55cfad6d2 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/llvm/lib/Target/X86/X86RegisterInfo.h @@ -13,6 +13,7 @@ #ifndef LLVM_LIB_TARGET_X86_X86REGISTERINFO_H #define LLVM_LIB_TARGET_X86_X86REGISTERINFO_H +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #define GET_REGINFO_HEADER @@ -180,6 +181,10 @@ class X86RegisterInfo final : public X86GenRegisterInfo { constrainRegClassToNonRex2(const TargetRegisterClass *RC) const; bool isNonRex2RegClass(const TargetRegisterClass *RC) const; + + bool requiresRegisterScavenging(const MachineFunction &MF) const override { + return true; + } }; } // End llvm namespace diff --git a/llvm/test/CodeGen/X86/avx512f-large-stack.ll b/llvm/test/CodeGen/X86/avx512f-large-stack.ll new file mode 100644 index 0000000000000..326f72b8e6d8b --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512f-large-stack.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp --version 4 +; RUN: llc -O0 -mtriple=x86_64 -mattr=+avx512f < %s | FileCheck %s --check-prefix=CHECK +define void @f(i16 %LGV2, i1 %LGV3) { +; CHECK-LABEL: f: +; CHECK: # %bb.0: # %BB +; CHECK-NEXT: subq $2147483528, %rsp # imm = 0x7FFFFF88 +; CHECK-NEXT: .cfi_def_cfa_offset 2147483536 +; CHECK-NEXT: movb %sil, %cl +; CHECK-NEXT: movw %di, %ax +; CHECK-NEXT: movswq %ax, %rax +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movabsq $-2147483768, %rdx # imm = 0xFFFFFFFF7FFFFF88 +; CHECK-NEXT: movb %cl, (%rsp,%rdx) +; CHECK-NEXT: addq $2147483528, %rsp # imm = 0x7FFFFF88 +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +BB: + %A = alloca i1, i33 2147483648, align 1 + %G = getelementptr i1, ptr %A, i16 %LGV2 + %G4 = getelementptr i1, ptr %G, i32 -2147483648 + store i1 %LGV3, ptr %G4, align 1 + ret void +} diff --git a/llvm/test/CodeGen/X86/huge-stack.ll b/llvm/test/CodeGen/X86/huge-stack.ll index 920033ba1182c..41b8a0141b63d 100644 --- a/llvm/test/CodeGen/X86/huge-stack.ll +++ b/llvm/test/CodeGen/X86/huge-stack.ll @@ -5,20 +5,70 @@ define void @foo() unnamed_addr #0 { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: -; CHECK-NEXT: movabsq $8589934462, %rax # imm = 0x1FFFFFF7E +; CHECK-NEXT: movabsq $8589934472, %rax # imm = 0x1FFFFFF88 ; CHECK-NEXT: subq %rax, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 8589934470 -; CHECK-NEXT: movb $42, -129(%rsp) -; CHECK-NEXT: movb $43, -128(%rsp) -; CHECK-NEXT: movabsq $8589934462, %rax # imm = 0x1FFFFFF7E +; CHECK-NEXT: .cfi_def_cfa_offset 8589934480 +; CHECK-NEXT: movabsq $4294967177, %rax # imm = 0xFFFFFF89 +; CHECK-NEXT: movb $42, (%rsp,%rax) +; CHECK-NEXT: movb $43, -118(%rsp) +; CHECK-NEXT: movabsq $8589934472, %rax # imm = 0x1FFFFFF88 ; CHECK-NEXT: addq %rax, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq - %1 = alloca %large, align 1 - %2 = alloca %large, align 1 - %3 = getelementptr inbounds %large, ptr %1, i64 0, i64 0 - store i8 42, ptr %3, align 1 - %4 = getelementptr inbounds %large, ptr %2, i64 0, i64 0 - store i8 43, ptr %4, align 1 + %large1 = alloca %large, align 1 + %large2 = alloca %large, align 1 + %ptrLarge1 = getelementptr inbounds %large, ptr %large1, i64 0, i64 0 + store i8 42, ptr %ptrLarge1, align 1 + %ptrLarge2 = getelementptr inbounds %large, ptr %large2, i64 0, i64 0 + store i8 43, ptr %ptrLarge2, align 1 ret void } + +declare ptr @baz(ptr, ptr, ptr, ptr) + +define ptr @scavenge_spill() unnamed_addr #0 { +; CHECK-LABEL: scavenge_spill: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $25769803816, %rax # imm = 0x600000028 +; CHECK-NEXT: subq %rax, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 25769803824 +; CHECK-NEXT: movabsq $21474836521, %rax # imm = 0x500000029 +; CHECK-NEXT: leaq (%rsp,%rax), %rdi +; CHECK-NEXT: movabsq $17179869226, %rax # imm = 0x40000002A +; CHECK-NEXT: leaq (%rsp,%rax), %rsi +; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movabsq $12884901931, %rax # imm = 0x30000002B +; CHECK-NEXT: leaq (%rsp,%rax), %rdx +; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movabsq $8589934636, %rax # imm = 0x20000002C +; CHECK-NEXT: leaq (%rsp,%rax), %rcx +; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: callq baz@PLT +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: leaq 46(%rsp), %rdi +; CHECK-NEXT: callq baz@PLT +; CHECK-NEXT: # kill: def $rcx killed $rax +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; CHECK-NEXT: movabsq $25769803816, %rcx # imm = 0x600000028 +; CHECK-NEXT: addq %rcx, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + %large1 = alloca %large, align 1 + %ptrLarge1 = getelementptr inbounds %large, ptr %large1, i64 0, i64 0 + %large2 = alloca %large, align 1 + %ptrLarge2 = getelementptr inbounds %large, ptr %large2, i64 0, i64 0 + %large3 = alloca %large, align 1 + %ptrLarge3 = getelementptr inbounds %large, ptr %large3, i64 0, i64 0 + %large4 = alloca %large, align 1 + %ptrLarge4 = getelementptr inbounds %large, ptr %large4, i64 0, i64 0 + %large5 = alloca %large, align 1 + %ptrLarge5 = getelementptr inbounds %large, ptr %large5, i64 0, i64 0 + %ret1 = call ptr @baz(ptr %ptrLarge1, ptr %ptrLarge2, ptr %ptrLarge3, ptr %ptrLarge4) + %large6 = alloca %large, align 1 + %ptrLarge6 = getelementptr inbounds %large, ptr %large6, i64 0, i64 0 + %ret2 = call ptr @baz(ptr %ptrLarge6, ptr %ptrLarge2, ptr %ptrLarge3, ptr %ptrLarge4) + ret ptr %ret1 +} diff --git a/llvm/test/CodeGen/X86/large-displacements-fastisel.ll b/llvm/test/CodeGen/X86/large-displacements-fastisel.ll new file mode 100644 index 0000000000000..4177466b8f74a --- /dev/null +++ b/llvm/test/CodeGen/X86/large-displacements-fastisel.ll @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64 -O=0 | FileCheck %s +@G = global i8 0 + +; Regression test for PR113856 - incorrect FastISel assert + +define i32 @main() { +; CHECK-LABEL: main: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $-2147483652, %rax # imm = 0xFFFFFFFF7FFFFFFC +; CHECK-NEXT: movl $0, (%rsp,%rax) +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq + %1 = alloca i32, align 4 + %G = getelementptr i8, ptr %1, i32 -2147483648 + store i32 0, ptr %G, align 4 + ret i32 0 +} diff --git a/llvm/test/CodeGen/X86/large-displacements.ll b/llvm/test/CodeGen/X86/large-displacements.ll new file mode 100644 index 0000000000000..8935ec07bb3fa --- /dev/null +++ b/llvm/test/CodeGen/X86/large-displacements.ll @@ -0,0 +1,82 @@ +; RUN: not llc < %s -mtriple=i686 -filetype=null 2>&1 | FileCheck %s -check-prefix=ERR-i686 +; RUN: llc < %s -mtriple=x86_64 | FileCheck %s -check-prefix=x86_64 + +; Regression test for #121932, #113856, #106352, #69365, #25051 which are caused by +; an incorrectly written assertion for 64-bit offsets when compiling for 32-bit X86. + +define i32 @main() #0 { +; ERR-i686: error: :0:0: 64-bit offset calculated but target is 32-bit +; +; x86_64-LABEL: main: +; x86_64: # %bb.0: # %entry +; x86_64-NEXT: movl $4294967192, %eax # imm = 0xFFFFFF98 +; x86_64-NEXT: subq %rax, %rsp +; x86_64-NEXT: .cfi_def_cfa_offset 4294967200 +; x86_64-NEXT: movabsq $3221225318, %rax # imm = 0xBFFFFF66 +; x86_64-NEXT: movb $32, (%rsp,%rax) +; x86_64-NEXT: movb $33, 2147483494(%rsp) +; x86_64-NEXT: movb $34, 1073741670(%rsp) +; x86_64-NEXT: movb $35, -154(%rsp) +; x86_64-NEXT: xorl %eax, %eax +; x86_64-NEXT: movl $4294967192, %ecx # imm = 0xFFFFFF98 +; x86_64-NEXT: addq %rcx, %rsp +; x86_64-NEXT: .cfi_def_cfa_offset 8 +; x86_64-NEXT: retq +entry: + %a = alloca [1073741824 x i8], align 16 + %b = alloca [1073741824 x i8], align 16 + %c = alloca [1073741824 x i8], align 16 + %d = alloca [1073741824 x i8], align 16 + + %arrayida = getelementptr inbounds [1073741824 x i8], ptr %a, i64 0, i64 -42 + %arrayidb = getelementptr inbounds [1073741824 x i8], ptr %b, i64 0, i64 -42 + %arrayidc = getelementptr inbounds [1073741824 x i8], ptr %c, i64 0, i64 -42 + %arrayidd = getelementptr inbounds [1073741824 x i8], ptr %d, i64 0, i64 -42 + + store i8 32, ptr %arrayida, align 2 + store i8 33, ptr %arrayidb, align 2 + store i8 34, ptr %arrayidc, align 2 + store i8 35, ptr %arrayidd, align 2 + + ret i32 0 +} + +; Same test as above but for an anonymous function. +define i32 @0() #0 { +; ERR-i686: error: :0:0: 64-bit offset calculated but target is 32-bit +; +; x86_64-LABEL: __unnamed_1: +; x86_64: # %bb.0: # %entry +; x86_64-NEXT: movl $4294967192, %eax # imm = 0xFFFFFF98 +; x86_64-NEXT: subq %rax, %rsp +; x86_64-NEXT: .cfi_def_cfa_offset 4294967200 +; x86_64-NEXT: movabsq $3221225318, %rax # imm = 0xBFFFFF66 +; x86_64-NEXT: movb $32, (%rsp,%rax) +; x86_64-NEXT: movb $33, 2147483494(%rsp) +; x86_64-NEXT: movb $34, 1073741670(%rsp) +; x86_64-NEXT: movb $35, -154(%rsp) +; x86_64-NEXT: xorl %eax, %eax +; x86_64-NEXT: movl $4294967192, %ecx # imm = 0xFFFFFF98 +; x86_64-NEXT: addq %rcx, %rsp +; x86_64-NEXT: .cfi_def_cfa_offset 8 +; x86_64-NEXT: retq +entry: + %a = alloca [1073741824 x i8], align 16 + %b = alloca [1073741824 x i8], align 16 + %c = alloca [1073741824 x i8], align 16 + %d = alloca [1073741824 x i8], align 16 + + %arrayida = getelementptr inbounds [1073741824 x i8], ptr %a, i64 0, i64 -42 + %arrayidb = getelementptr inbounds [1073741824 x i8], ptr %b, i64 0, i64 -42 + %arrayidc = getelementptr inbounds [1073741824 x i8], ptr %c, i64 0, i64 -42 + %arrayidd = getelementptr inbounds [1073741824 x i8], ptr %d, i64 0, i64 -42 + + store i8 32, ptr %arrayida, align 2 + store i8 33, ptr %arrayidb, align 2 + store i8 34, ptr %arrayidc, align 2 + store i8 35, ptr %arrayidd, align 2 + + ret i32 0 +} + +attributes #0 = { optnone noinline } diff --git a/llvm/test/CodeGen/X86/merge-huge-sp-updates.ll b/llvm/test/CodeGen/X86/merge-huge-sp-updates.ll index b26345e2d5bbc..6920e74d3ec5a 100644 --- a/llvm/test/CodeGen/X86/merge-huge-sp-updates.ll +++ b/llvm/test/CodeGen/X86/merge-huge-sp-updates.ll @@ -22,8 +22,8 @@ entry: call void @bar(i64 0, i64 0, i64 0, i64 0, i64 0, ptr null, ptr %rhs, ptr null, ptr %rhs) ; CHECK: call{{.*}}bar ; CHECK: addq{{.*}}$2147483647, %rsp -; CHECK: addq{{.*}}$372037585, %rsp -; CHECK: .cfi_adjust_cfa_offset -2519521232 +; CHECK: addq{{.*}}$372037601, %rsp +; CHECK: .cfi_adjust_cfa_offset -2519521248 ret void } diff --git a/llvm/test/CodeGen/X86/stack-clash-extra-huge.ll b/llvm/test/CodeGen/X86/stack-clash-extra-huge.ll index d9b20f50e9a88..4c8bb62e87dcd 100644 --- a/llvm/test/CodeGen/X86/stack-clash-extra-huge.ll +++ b/llvm/test/CodeGen/X86/stack-clash-extra-huge.ll @@ -16,13 +16,13 @@ define i32 @foo() local_unnamed_addr #0 { ; CHECK-X64-NEXT: cmpq %r11, %rsp ; CHECK-X64-NEXT: jne .LBB0_1 ; CHECK-X64-NEXT: # %bb.2: -; CHECK-X64-NEXT: subq $3976, %rsp # imm = 0xF88 +; CHECK-X64-NEXT: subq $3992, %rsp # imm = 0xF98 ; CHECK-X64-NEXT: .cfi_def_cfa_register %rsp -; CHECK-X64-NEXT: .cfi_def_cfa_offset 4799999888 -; CHECK-X64-NEXT: movl $1, 264(%rsp) -; CHECK-X64-NEXT: movl $1, 28664(%rsp) -; CHECK-X64-NEXT: movl -128(%rsp), %eax -; CHECK-X64-NEXT: movabsq $4799999880, %rcx # imm = 0x11E1A2F88 +; CHECK-X64-NEXT: .cfi_def_cfa_offset 4799999904 +; CHECK-X64-NEXT: movl $1, 280(%rsp) +; CHECK-X64-NEXT: movl $1, 28680(%rsp) +; CHECK-X64-NEXT: movl -112(%rsp), %eax +; CHECK-X64-NEXT: movabsq $4799999896, %rcx # imm = 0x11E1A2F98 ; CHECK-X64-NEXT: addq %rcx, %rsp ; CHECK-X64-NEXT: .cfi_def_cfa_offset 8 ; CHECK-X64-NEXT: retq @@ -30,10 +30,10 @@ define i32 @foo() local_unnamed_addr #0 { ; CHECK-X86-LABEL: foo: ; CHECK-X86: # %bb.0: ; CHECK-X86-NEXT: ud2 -; CHECK-X86-NEXT: .cfi_def_cfa_offset 4800000016 -; CHECK-X86-NEXT: movl $1, 392(%esp) -; CHECK-X86-NEXT: movl $1, 28792(%esp) -; CHECK-X86-NEXT: movl (%esp), %eax +; CHECK-X86-NEXT: .cfi_def_cfa_offset 4800000032 +; CHECK-X86-NEXT: movl $1, 408(%esp) +; CHECK-X86-NEXT: movl $1, 28808(%esp) +; CHECK-X86-NEXT: movl 16(%esp), %eax ; CHECK-X86-NEXT: ud2 ; CHECK-X86-NEXT: .cfi_def_cfa_offset 4 ; CHECK-X86-NEXT: retl @@ -41,10 +41,10 @@ define i32 @foo() local_unnamed_addr #0 { ; CHECK-X32-LABEL: foo: ; CHECK-X32: # %bb.0: ; CHECK-X32-NEXT: ud2 -; CHECK-X32-NEXT: .cfi_def_cfa_offset 4799999888 -; CHECK-X32-NEXT: movl $1, 264(%esp) -; CHECK-X32-NEXT: movl $1, 28664(%esp) -; CHECK-X32-NEXT: movl -128(%esp), %eax +; CHECK-X32-NEXT: .cfi_def_cfa_offset 4799999904 +; CHECK-X32-NEXT: movl $1, 280(%esp) +; CHECK-X32-NEXT: movl $1, 28680(%esp) +; CHECK-X32-NEXT: movl -112(%esp), %eax ; CHECK-X32-NEXT: ud2 ; CHECK-X32-NEXT: .cfi_def_cfa_offset 8 ; CHECK-X32-NEXT: retq diff --git a/llvm/test/CodeGen/X86/stack-clash-huge.ll b/llvm/test/CodeGen/X86/stack-clash-huge.ll index c9990773201f0..0e8c2155c6501 100644 --- a/llvm/test/CodeGen/X86/stack-clash-huge.ll +++ b/llvm/test/CodeGen/X86/stack-clash-huge.ll @@ -16,13 +16,13 @@ define i32 @foo() local_unnamed_addr #0 { ; CHECK-X64-NEXT: cmpq %r11, %rsp ; CHECK-X64-NEXT: jne .LBB0_1 ; CHECK-X64-NEXT: # %bb.2: -; CHECK-X64-NEXT: subq $1928, %rsp # imm = 0x788 +; CHECK-X64-NEXT: subq $1944, %rsp # imm = 0x798 ; CHECK-X64-NEXT: .cfi_def_cfa_register %rsp -; CHECK-X64-NEXT: .cfi_def_cfa_offset 2399999888 -; CHECK-X64-NEXT: movl $1, 264(%rsp) -; CHECK-X64-NEXT: movl $1, 28664(%rsp) -; CHECK-X64-NEXT: movl -128(%rsp), %eax -; CHECK-X64-NEXT: movl $2399999880, %ecx # imm = 0x8F0D1788 +; CHECK-X64-NEXT: .cfi_def_cfa_offset 2399999904 +; CHECK-X64-NEXT: movl $1, 280(%rsp) +; CHECK-X64-NEXT: movl $1, 28680(%rsp) +; CHECK-X64-NEXT: movl -112(%rsp), %eax +; CHECK-X64-NEXT: movl $2399999896, %ecx # imm = 0x8F0D1798 ; CHECK-X64-NEXT: addq %rcx, %rsp ; CHECK-X64-NEXT: .cfi_def_cfa_offset 8 ; CHECK-X64-NEXT: retq @@ -39,13 +39,13 @@ define i32 @foo() local_unnamed_addr #0 { ; CHECK-X86-NEXT: cmpl %eax, %esp ; CHECK-X86-NEXT: jne .LBB0_1 ; CHECK-X86-NEXT: # %bb.2: -; CHECK-X86-NEXT: subl $2060, %esp # imm = 0x80C +; CHECK-X86-NEXT: subl $2076, %esp # imm = 0x81C ; CHECK-X86-NEXT: .cfi_def_cfa_register %esp -; CHECK-X86-NEXT: .cfi_def_cfa_offset 2400000016 -; CHECK-X86-NEXT: movl $1, 392(%esp) -; CHECK-X86-NEXT: movl $1, 28792(%esp) -; CHECK-X86-NEXT: movl (%esp), %eax -; CHECK-X86-NEXT: movl $2400000012, %ecx # imm = 0x8F0D180C +; CHECK-X86-NEXT: .cfi_def_cfa_offset 2400000032 +; CHECK-X86-NEXT: movl $1, 408(%esp) +; CHECK-X86-NEXT: movl $1, 28808(%esp) +; CHECK-X86-NEXT: movl 16(%esp), %eax +; CHECK-X86-NEXT: movl $2400000028, %ecx # imm = 0x8F0D181C ; CHECK-X86-NEXT: addl %ecx, %esp ; CHECK-X86-NEXT: .cfi_def_cfa_offset 4 ; CHECK-X86-NEXT: retl @@ -62,13 +62,13 @@ define i32 @foo() local_unnamed_addr #0 { ; CHECK-X32-NEXT: cmpl %r11d, %esp ; CHECK-X32-NEXT: jne .LBB0_1 ; CHECK-X32-NEXT: # %bb.2: -; CHECK-X32-NEXT: subl $1928, %esp # imm = 0x788 +; CHECK-X32-NEXT: subl $1944, %esp # imm = 0x798 ; CHECK-X32-NEXT: .cfi_def_cfa_register %rsp -; CHECK-X32-NEXT: .cfi_def_cfa_offset 2399999888 -; CHECK-X32-NEXT: movl $1, 264(%esp) -; CHECK-X32-NEXT: movl $1, 28664(%esp) -; CHECK-X32-NEXT: movl -128(%esp), %eax -; CHECK-X32-NEXT: movl $2399999880, %ecx # imm = 0x8F0D1788 +; CHECK-X32-NEXT: .cfi_def_cfa_offset 2399999904 +; CHECK-X32-NEXT: movl $1, 280(%esp) +; CHECK-X32-NEXT: movl $1, 28680(%esp) +; CHECK-X32-NEXT: movl -112(%esp), %eax +; CHECK-X32-NEXT: movl $2399999896, %ecx # imm = 0x8F0D1798 ; CHECK-X32-NEXT: addl %ecx, %esp ; CHECK-X32-NEXT: .cfi_def_cfa_offset 8 ; CHECK-X32-NEXT: retq diff --git a/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll b/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll index 9555ce032db90..732fc6543e314 100644 --- a/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll +++ b/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll @@ -10,5 +10,5 @@ start: attributes #0 = { nonlazybind uwtable "probe-stack"="probe_stack" "target-cpu"="x86-64" } ; CHECK-LABEL: foo: -; CHECK: movabsq $4294967304, %rax +; CHECK: movabsq $4294967312, %rax ; CHECK-NEXT: callq probe_stack From 3518f93e7efb3541838cbc91dd4ad8966b95b376 Mon Sep 17 00:00:00 2001 From: Wesley Wiser Date: Wed, 6 Aug 2025 01:35:09 +0000 Subject: [PATCH 2/2] Insert trap for 64-bit offsets on X86 32-bit targets This prevents the verify instruction pass from reporting the offsets as erroneous when we've already reported the error to the user. Add `-verify-machineinstrs` to tests for large displacements to catch simliar issues. --- llvm/lib/Target/X86/X86RegisterInfo.cpp | 10 +++++++--- llvm/test/CodeGen/X86/avx512f-large-stack.ll | 2 +- llvm/test/CodeGen/X86/large-displacements-fastisel.ll | 2 +- llvm/test/CodeGen/X86/large-displacements.ll | 4 ++-- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 20c6e61da5fd3..ef917f72f39c8 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -959,6 +959,8 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } if (MI.getOperand(FIOperandNum+3).isImm()) { + const X86InstrInfo *TII = MF.getSubtarget().getInstrInfo(); + const DebugLoc &DL = MI.getDebugLoc(); int64_t Imm = MI.getOperand(FIOperandNum + 3).getImm(); int64_t Offset = FIOffset + Imm; bool FitsIn32Bits = isInt<32>(Offset); @@ -966,8 +968,6 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // targets, scavenge a register to hold it. Otherwise... if (Is64Bit && !FitsIn32Bits) { assert(RS && "RegisterScavenger was NULL"); - const X86InstrInfo *TII = MF.getSubtarget().getInstrInfo(); - const DebugLoc &DL = MI.getDebugLoc(); RS->enterBasicBlockEnd(MBB); RS->backward(std::next(II)); @@ -987,8 +987,12 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } // ... for 32-bit targets, this is a bug! - if (!Is64Bit && !FitsIn32Bits) + if (!Is64Bit && !FitsIn32Bits) { MI.emitGenericError("64-bit offset calculated but target is 32-bit"); + // Trap so that the instruction verification pass does not fail if run. + BuildMI(MBB, MBBI, DL, TII->get(X86::TRAP)); + return false; + } if (Offset != 0 || !tryOptimizeLEAtoMOV(II)) MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); diff --git a/llvm/test/CodeGen/X86/avx512f-large-stack.ll b/llvm/test/CodeGen/X86/avx512f-large-stack.ll index 326f72b8e6d8b..3cb5391c56abf 100644 --- a/llvm/test/CodeGen/X86/avx512f-large-stack.ll +++ b/llvm/test/CodeGen/X86/avx512f-large-stack.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp --version 4 -; RUN: llc -O0 -mtriple=x86_64 -mattr=+avx512f < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -O0 -mtriple=x86_64 -mattr=+avx512f -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK define void @f(i16 %LGV2, i1 %LGV3) { ; CHECK-LABEL: f: ; CHECK: # %bb.0: # %BB diff --git a/llvm/test/CodeGen/X86/large-displacements-fastisel.ll b/llvm/test/CodeGen/X86/large-displacements-fastisel.ll index 4177466b8f74a..362b1b5da092d 100644 --- a/llvm/test/CodeGen/X86/large-displacements-fastisel.ll +++ b/llvm/test/CodeGen/X86/large-displacements-fastisel.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=x86_64 -O=0 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64 -O=0 -verify-machineinstrs | FileCheck %s @G = global i8 0 ; Regression test for PR113856 - incorrect FastISel assert diff --git a/llvm/test/CodeGen/X86/large-displacements.ll b/llvm/test/CodeGen/X86/large-displacements.ll index 8935ec07bb3fa..d7085a56edbb9 100644 --- a/llvm/test/CodeGen/X86/large-displacements.ll +++ b/llvm/test/CodeGen/X86/large-displacements.ll @@ -1,5 +1,5 @@ -; RUN: not llc < %s -mtriple=i686 -filetype=null 2>&1 | FileCheck %s -check-prefix=ERR-i686 -; RUN: llc < %s -mtriple=x86_64 | FileCheck %s -check-prefix=x86_64 +; RUN: not llc < %s -mtriple=i686 -filetype=null -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=ERR-i686 +; RUN: llc < %s -mtriple=x86_64 -verify-machineinstrs | FileCheck %s -check-prefix=x86_64 ; Regression test for #121932, #113856, #106352, #69365, #25051 which are caused by ; an incorrectly written assertion for 64-bit offsets when compiling for 32-bit X86.