Skip to content

Commit cbc0b82

Browse files
committed
[GR-66718] Use proper SIMD encoding in AMD64ConvertFloatToIntegerOp
PullRequest: graal/21282
2 parents 54d69dc + d72b6dc commit cbc0b82

File tree

1 file changed

+96
-54
lines changed

1 file changed

+96
-54
lines changed
Lines changed: 96 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2023, 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -26,20 +26,26 @@
2626
package jdk.graal.compiler.lir.amd64;
2727

2828
import static jdk.graal.compiler.asm.amd64.AMD64Assembler.SSEOp.UCOMIS;
29+
import static jdk.graal.compiler.asm.amd64.AMD64Assembler.SSEOp.XOR;
30+
import static jdk.graal.compiler.asm.amd64.AMD64Assembler.VexRMOp.VUCOMISD;
31+
import static jdk.graal.compiler.asm.amd64.AMD64Assembler.VexRMOp.VUCOMISS;
32+
import static jdk.graal.compiler.asm.amd64.AMD64Assembler.VexRVMOp.VPXOR;
2933
import static jdk.graal.compiler.asm.amd64.AMD64BaseAssembler.OperandSize;
34+
import static jdk.graal.compiler.asm.amd64.AVXKind.AVXSize.XMM;
3035
import static jdk.graal.compiler.lir.LIRInstruction.OperandFlag.ILLEGAL;
3136
import static jdk.graal.compiler.lir.LIRInstruction.OperandFlag.REG;
3237
import static jdk.vm.ci.code.ValueUtil.asRegister;
3338

3439
import jdk.graal.compiler.asm.Label;
3540
import jdk.graal.compiler.asm.amd64.AMD64Address;
3641
import jdk.graal.compiler.asm.amd64.AMD64Assembler;
42+
import jdk.graal.compiler.asm.amd64.AMD64Assembler.AMD64SIMDInstructionEncoding;
3743
import jdk.graal.compiler.asm.amd64.AMD64MacroAssembler;
38-
import jdk.graal.compiler.asm.amd64.AVXKind;
3944
import jdk.graal.compiler.debug.GraalError;
4045
import jdk.graal.compiler.lir.LIRInstructionClass;
4146
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
4247
import jdk.graal.compiler.lir.gen.LIRGeneratorTool;
48+
import jdk.vm.ci.amd64.AMD64;
4349
import jdk.vm.ci.amd64.AMD64Kind;
4450
import jdk.vm.ci.code.Register;
4551
import jdk.vm.ci.meta.JavaConstant;
@@ -58,26 +64,51 @@ public class AMD64ConvertFloatToIntegerOp extends AMD64LIRInstruction {
5864

5965
@Def({REG}) protected Value dstValue;
6066
@Alive({REG}) protected Value srcValue;
61-
@Temp({REG, ILLEGAL}) protected Value tmpValue;
67+
@Temp({REG, ILLEGAL}) protected Value zeroTmp;
6268

63-
private final OpcodeEmitter opcode;
69+
private final OpcodeEmitter opcodeEmitter;
6470
private final boolean canBeNaN;
6571
private final boolean canOverflow;
72+
private final int integerBytes;
73+
/**
74+
* The size of the input float operand. We only emit scalar instructions, but UCOMIS wants to be
75+
* encoded with a packed size.
76+
*/
77+
private final OperandSize packedSize;
78+
/** The size of the input float operand. */
79+
private final OperandSize scalarSize;
80+
private final AMD64SIMDInstructionEncoding encoding;
6681

6782
@FunctionalInterface
6883
public interface OpcodeEmitter {
6984
/** Emit the actual conversion instruction. */
7085
void emit(CompilationResultBuilder crb, AMD64MacroAssembler masm, Register dst, Register src);
7186
}
7287

73-
public AMD64ConvertFloatToIntegerOp(LIRGeneratorTool tool, OpcodeEmitter opcode, Value dstValue, Value srcValue, boolean canBeNaN, boolean canOverflow) {
88+
public AMD64ConvertFloatToIntegerOp(LIRGeneratorTool tool, OpcodeEmitter opcodeEmitter, Value dstValue, Value srcValue, boolean canBeNaN,
89+
boolean canOverflow) {
7490
super(TYPE);
91+
this.opcodeEmitter = opcodeEmitter;
7592
this.dstValue = dstValue;
7693
this.srcValue = srcValue;
77-
this.opcode = opcode;
78-
this.tmpValue = canOverflow ? tool.newVariable(srcValue.getValueKind()) : Value.ILLEGAL;
94+
this.zeroTmp = canOverflow ? tool.newVariable(srcValue.getValueKind()) : Value.ILLEGAL;
7995
this.canBeNaN = canBeNaN;
8096
this.canOverflow = canOverflow;
97+
this.integerBytes = dstValue.getPlatformKind().getSizeInBytes();
98+
GraalError.guarantee(integerBytes == 4 || integerBytes == 8, "unexpected target %s", dstValue);
99+
switch (srcValue.getPlatformKind().getSizeInBytes()) {
100+
case 4:
101+
this.packedSize = OperandSize.PS;
102+
this.scalarSize = OperandSize.SS;
103+
break;
104+
case 8:
105+
this.packedSize = OperandSize.PD;
106+
this.scalarSize = OperandSize.SD;
107+
break;
108+
default:
109+
throw GraalError.shouldNotReachHere("unexpected input %s".formatted(srcValue));
110+
}
111+
this.encoding = AMD64SIMDInstructionEncoding.forFeatures(((AMD64) tool.target().arch).getFeatures());
81112

82113
GraalError.guarantee(srcValue.getPlatformKind() instanceof AMD64Kind kind && kind.getVectorLength() == 1 && kind.isXMM(), "source must be scalar floating-point: %s", srcValue);
83114
GraalError.guarantee(dstValue.getPlatformKind() instanceof AMD64Kind kind && kind.getVectorLength() == 1 && kind.isInteger(), "destination must be integer: %s", dstValue);
@@ -90,21 +121,13 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
90121
Label fixupPath = new Label();
91122
Label done = new Label();
92123

93-
opcode.emit(crb, masm, dst, src);
124+
opcodeEmitter.emit(crb, masm, dst, src);
94125

95126
if (!canBeNaN && !canOverflow) {
96127
/* No fixup needed. */
97128
return;
98129
}
99130

100-
int integerBytes = dstValue.getPlatformKind().getSizeInBytes();
101-
GraalError.guarantee(integerBytes == 4 || integerBytes == 8, "unexpected target %s", dstValue);
102-
OperandSize floatSize = switch (srcValue.getPlatformKind().getSizeInBytes()) {
103-
case 4 -> OperandSize.PS;
104-
case 8 -> OperandSize.PD;
105-
default -> throw GraalError.shouldNotReachHere("unexpected input %s".formatted(srcValue));
106-
};
107-
108131
/*
109132
* if (dst == MIN_VALUE) { goto fixupPath; }
110133
*/
@@ -117,48 +140,67 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
117140

118141
crb.getLIR().addSlowPath(this, () -> {
119142
masm.bind(fixupPath);
143+
emitFixups(masm, src, dst, done);
144+
masm.jmp(done);
145+
});
120146

121-
if (canBeNaN) {
122-
/*
123-
* if (isNaN(src)) { result = 0; goto done; }
124-
*
125-
* The isNaN check is implemented as src != src. C2's fixup stubs check for a NaN
126-
* bit pattern directly, using the same number of cycles but using an extra general
127-
* purpose register.
128-
*/
129-
Label isNotNaN = new Label();
130-
UCOMIS.emit(masm, floatSize, src, src);
131-
masm.jcc(AMD64Assembler.ConditionFlag.NoParity, isNotNaN, true);
132-
masm.moveInt(dst, 0);
133-
masm.jmp(done);
134-
masm.bind(isNotNaN);
135-
}
147+
masm.bind(done);
148+
}
149+
150+
@SuppressWarnings("unused")
151+
private void emitFixups(AMD64MacroAssembler masm, Register src, Register dst, Label done) {
152+
if (canBeNaN) {
153+
/*
154+
* if (isNaN(src)) { result = 0; goto done; }
155+
*
156+
* The isNaN check is implemented as src != src. C2's fixup stubs check for a NaN bit
157+
* pattern directly, using the same number of cycles but using an extra general purpose
158+
* register.
159+
*/
160+
Label isNotNaN = new Label();
161+
compare(masm, src, src);
162+
masm.jcc(AMD64Assembler.ConditionFlag.NoParity, isNotNaN, true);
163+
masm.moveInt(dst, 0);
164+
masm.jmp(done);
165+
masm.bind(isNotNaN);
166+
}
136167

137-
if (canOverflow) {
138-
/*
139-
* if (src > 0.0) { result = MAX_VALUE; }
140-
*
141-
* We use an actual floating point compare, C2's stubs check the sign bit in a GPR.
142-
*/
143-
Register zero = asRegister(tmpValue);
144-
masm.pxor(AVXKind.AVXSize.XMM, zero, zero);
145-
UCOMIS.emit(masm, floatSize, src, zero);
146-
masm.jcc(AMD64Assembler.ConditionFlag.BelowEqual, done);
147-
/*
148-
* MAX_VALUE is the bitwise negation of MIN_VALUE, which is already in dst. A
149-
* negation takes the same number of cycles as a move, but its encoding is shorter.
150-
*/
151-
if (integerBytes == 4) {
152-
masm.notl(dst);
153-
} else {
154-
masm.notq(dst);
155-
}
168+
if (canOverflow) {
169+
/*
170+
* if (src > 0.0) { result = MAX_VALUE; }
171+
*
172+
* We use an actual floating point compare, C2's stubs check the sign bit in a GPR.
173+
*/
174+
Register zero = asRegister(zeroTmp);
175+
clearRegister(masm, zero);
176+
compare(masm, src, zero);
177+
masm.jcc(AMD64Assembler.ConditionFlag.BelowEqual, done);
178+
/*
179+
* MAX_VALUE is the bitwise negation of MIN_VALUE, which is already in dst. A negation
180+
* takes the same number of cycles as a move, but its encoding is shorter.
181+
*/
182+
if (integerBytes == 4) {
183+
masm.notl(dst);
184+
} else {
185+
masm.notq(dst);
156186
}
187+
}
188+
}
157189

158-
/* Return to inline code. */
159-
masm.jmp(done);
160-
});
190+
private void clearRegister(AMD64MacroAssembler masm, Register register) {
191+
if (masm.isAVX()) {
192+
VPXOR.encoding(encoding).emit(masm, XMM, register, register, register);
193+
} else {
194+
XOR.emit(masm, packedSize, register, register);
195+
}
196+
}
161197

162-
masm.bind(done);
198+
private void compare(AMD64MacroAssembler masm, Register x, Register y) {
199+
if (masm.isAVX()) {
200+
var ucomis = scalarSize == OperandSize.SS ? VUCOMISS : VUCOMISD;
201+
ucomis.encoding(encoding).emit(masm, XMM, x, y);
202+
} else {
203+
UCOMIS.emit(masm, packedSize, x, y);
204+
}
163205
}
164206
}

0 commit comments

Comments
 (0)