-
Notifications
You must be signed in to change notification settings - Fork 14.6k
[AMDGPU][MC] GFX9 - allow op_sel in v_interp_p2_f16 #150712
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[AMDGPU][MC] GFX9 - allow op_sel in v_interp_p2_f16 #150712
Conversation
AMDGPU documentation states op_sel[3] can be used in v_interp_p2_f16 in GFX9.
@llvm/pr-subscribers-mc Author: Jun Wang (jwanggit86) ChangesAMDGPU documentation states op_sel[3] can be used in v_interp_p2_f16 in GFX9. Full diff: https://github.com/llvm/llvm-project/pull/150712.diff 6 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 5bb0e3648d2ec..d7ce772ab1ec6 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -9164,6 +9164,26 @@ void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
addOptionalImmOperand(Inst, Operands, OptionalIdx,
AMDGPUOperand::ImmTyOModSI);
+
+ // Some v_interp instrutions use op_sel[3] for dst.
+ if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx,
+ AMDGPUOperand::ImmTyOpSel);
+
+ int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
+ unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
+
+ // Check if op_sel[3] is set, which is meant for dst.
+ if ((OpSel & (1 << 3)) != 0) {
+ int ModIdx =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
+ uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
+
+ ModVal |= SISrcMods::DST_OP_SEL;
+
+ Inst.getOperand(ModIdx).setImm(ModVal);
+ }
+ }
}
void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 197bb3f0b569b..b2aa407302c95 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -1293,6 +1293,14 @@ void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
O << " op_sel:[" << FI << ',' << BC << ']';
return;
}
+ if (Opc == AMDGPU::V_INTERP_P2_F16_opsel_gfx9) {
+ int ModIdx =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
+ uint32_t ModVal = MI->getOperand(ModIdx).getImm();
+ if (ModVal & SISrcMods::DST_OP_SEL)
+ O << " op_sel:[0,0,0,1]";
+ return;
+ }
printPackedModifier(MI, " op_sel:[", SISrcMods::OP_SEL_0, O);
}
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index aee2f2cb3d771..fcc9961b722ea 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -67,6 +67,7 @@ class VOP3Interp<string OpName, VOPProfile P, list<dag> pattern = []> :
VOP3_Pseudo<OpName, P, pattern> {
let AsmMatchConverter = "cvtVOP3Interp";
let mayRaiseFPException = 0;
+ let VOP3_OPSEL = P.HasOpSel;
}
def VOP3_INTERP : VOPProfile<[f32, f32, i32, untyped]> {
@@ -89,16 +90,17 @@ def VOP3_INTERP_MOV : VOPProfile<[f32, i32, i32, untyped]> {
let HasSrc0Mods = 0;
}
-class getInterp16Asm <bit HasSrc2, bit HasOMod> {
+class getInterp16Asm <bit HasSrc2, bit HasOMod, bit OpSel> {
string src2 = !if(HasSrc2, ", $src2_modifiers", "");
string omod = !if(HasOMod, "$omod", "");
+ string opsel = !if(OpSel, "$op_sel", "");
string ret =
- " $vdst, $src0_modifiers, $attr$attrchan"#src2#"$high$clamp"#omod;
+ " $vdst, $src0_modifiers, $attr$attrchan"#src2#"$high$clamp"#omod#opsel;
}
class getInterp16Ins <bit HasSrc2, bit HasOMod,
- Operand Src0Mod, Operand Src2Mod> {
- dag ret = !if(HasSrc2,
+ Operand Src0Mod, Operand Src2Mod, bit OpSel> {
+ dag ret1 = !if(HasSrc2,
!if(HasOMod,
(ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
InterpAttr:$attr, InterpAttrChan:$attrchan,
@@ -113,19 +115,22 @@ class getInterp16Ins <bit HasSrc2, bit HasOMod,
InterpAttr:$attr, InterpAttrChan:$attrchan,
highmod:$high, Clamp0:$clamp, omod0:$omod)
);
+ dag ret2 = !if(OpSel, (ins op_sel0:$op_sel), (ins));
+ dag ret = !con(ret1, ret2);
}
-class VOP3_INTERP16 <list<ValueType> ArgVT> : VOPProfile<ArgVT> {
+class VOP3_INTERP16 <list<ValueType> ArgVT, bit OpSel = 0> : VOPProfile<ArgVT> {
let IsSingle = 1;
let HasOMod = !ne(DstVT.Value, f16.Value);
let HasHigh = 1;
+ let HasOpSel = OpSel;
let Src0Mod = FPVRegInputMods;
let Src2Mod = FPVRegInputMods;
let Outs64 = (outs DstRC.RegClass:$vdst);
- let Ins64 = getInterp16Ins<HasSrc2, HasOMod, Src0Mod, Src2Mod>.ret;
- let Asm64 = getInterp16Asm<HasSrc2, HasOMod>.ret;
+ let Ins64 = getInterp16Ins<HasSrc2, HasOMod, Src0Mod, Src2Mod, OpSel>.ret;
+ let Asm64 = getInterp16Asm<HasSrc2, HasOMod, OpSel>.ret;
}
//===----------------------------------------------------------------------===//
@@ -435,7 +440,7 @@ let SubtargetPredicate = isGFX9Plus in {
defm V_MAD_U16_gfx9 : VOP3Inst_t16 <"v_mad_u16_gfx9", VOP_I16_I16_I16_I16>;
defm V_MAD_I16_gfx9 : VOP3Inst_t16 <"v_mad_i16_gfx9", VOP_I16_I16_I16_I16>;
let OtherPredicates = [isNotGFX90APlus] in
-def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>;
+def V_INTERP_P2_F16_opsel : VOP3Interp <"v_interp_p2_f16_opsel", VOP3_INTERP16<[f16, f32, i32, f32], /*OpSel*/ 1>>;
} // End SubtargetPredicate = isGFX9Plus
// This predicate should only apply to the selection pattern. The
@@ -2241,6 +2246,14 @@ multiclass VOP3Interp_F16_Real_gfx9<bits<10> op, string OpName, string AsmName>
}
}
+multiclass VOP3Interp_F16_OpSel_Real_gfx9<bits<10> op, string OpName, string AsmName> {
+ def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(OpName), SIEncodingFamily.GFX9>,
+ VOP3Interp_OpSel_gfx9 <op, !cast<VOP3_Pseudo>(OpName).Pfl> {
+ VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName);
+ let AsmString = AsmName # ps.AsmOperands;
+ }
+}
+
multiclass VOP3_Real_gfx9<bits<10> op, string AsmName> {
def _gfx9 : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>,
VOP3e_vi <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> {
@@ -2353,7 +2366,7 @@ defm V_MAD_U16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x204, "v_mad_u16">;
defm V_MAD_I16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x205, "v_mad_i16">;
defm V_FMA_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x206, "v_fma_f16">;
defm V_DIV_FIXUP_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x207, "v_div_fixup_f16">;
-defm V_INTERP_P2_F16_gfx9 : VOP3Interp_F16_Real_gfx9 <0x277, "V_INTERP_P2_F16_gfx9", "v_interp_p2_f16">;
+defm V_INTERP_P2_F16_opsel : VOP3Interp_F16_OpSel_Real_gfx9 <0x277, "V_INTERP_P2_F16_opsel", "v_interp_p2_f16">;
defm V_ADD_I32 : VOP3_Real_vi <0x29c>;
defm V_SUB_I32 : VOP3_Real_vi <0x29d>;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index c21e2d38398fa..64f0a32d0c0a4 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -401,6 +401,10 @@ class VOP3Interp_vi <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> {
let Inst{49-41} = src0;
}
+class VOP3Interp_OpSel_gfx9<bits<10> op, VOPProfile p> : VOP3Interp_vi<op, p> {
+ let Inst{14} = !if(p.HasDst, src0_modifiers{3}, 0);
+}
+
class VOP3Interp_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> {
bits<6> attr;
bits<2> attrchan;
diff --git a/llvm/test/MC/AMDGPU/vop3-gfx9.s b/llvm/test/MC/AMDGPU/vop3-gfx9.s
index a61b0c87e199f..0a43cb9615461 100644
--- a/llvm/test/MC/AMDGPU/vop3-gfx9.s
+++ b/llvm/test/MC/AMDGPU/vop3-gfx9.s
@@ -566,6 +566,36 @@ v_interp_p2_f16 v5, v2, attr0.x, v3 clamp
// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// VI: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x76,0xd2,0x00,0x04,0x0e,0x04]
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1,1,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 high op_sel:[0,0,0,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 high op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x05,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 clamp op_sel:[0,0,0,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp op_sel:[0,0,0,1] ; encoding: [0x05,0xc0,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
v_interp_p2_legacy_f16 v5, v2, attr31.x, v3
// GFX9: v_interp_p2_legacy_f16 v5, v2, attr31.x, v3 ; encoding: [0x05,0x00,0x76,0xd2,0x1f,0x04,0x0e,0x04]
// NOGCN: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt
index 802d6368507e2..35b02602e39df 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt
@@ -19311,6 +19311,15 @@
# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x77,0xd2,0x00,0x04,0x0e,0x04]
0x05,0x80,0x77,0xd2,0x00,0x04,0x0e,0x04
+# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x04,0x0e,0x04]
+0x05,0x40,0x77,0xd2,0x00,0x04,0x0e,0x04
+
+# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 high op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x05,0x0e,0x04]
+0x05,0x40,0x77,0xd2,0x00,0x05,0x0e,0x04
+
+# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp op_sel:[0,0,0,1] ; encoding: [0x05,0xc0,0x77,0xd2,0x00,0x04,0x0e,0x04]
+0x05,0xc0,0x77,0xd2,0x00,0x04,0x0e,0x04
+
# CHECK: v_add_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x80,0xd2,0x01,0x05,0x02,0x00]
0x05,0x00,0x80,0xd2,0x01,0x05,0x02,0x00
|
@llvm/pr-subscribers-backend-amdgpu Author: Jun Wang (jwanggit86) ChangesAMDGPU documentation states op_sel[3] can be used in v_interp_p2_f16 in GFX9. Full diff: https://github.com/llvm/llvm-project/pull/150712.diff 6 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 5bb0e3648d2ec..d7ce772ab1ec6 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -9164,6 +9164,26 @@ void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
addOptionalImmOperand(Inst, Operands, OptionalIdx,
AMDGPUOperand::ImmTyOModSI);
+
+ // Some v_interp instrutions use op_sel[3] for dst.
+ if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx,
+ AMDGPUOperand::ImmTyOpSel);
+
+ int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
+ unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
+
+ // Check if op_sel[3] is set, which is meant for dst.
+ if ((OpSel & (1 << 3)) != 0) {
+ int ModIdx =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
+ uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
+
+ ModVal |= SISrcMods::DST_OP_SEL;
+
+ Inst.getOperand(ModIdx).setImm(ModVal);
+ }
+ }
}
void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 197bb3f0b569b..b2aa407302c95 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -1293,6 +1293,14 @@ void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
O << " op_sel:[" << FI << ',' << BC << ']';
return;
}
+ if (Opc == AMDGPU::V_INTERP_P2_F16_opsel_gfx9) {
+ int ModIdx =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
+ uint32_t ModVal = MI->getOperand(ModIdx).getImm();
+ if (ModVal & SISrcMods::DST_OP_SEL)
+ O << " op_sel:[0,0,0,1]";
+ return;
+ }
printPackedModifier(MI, " op_sel:[", SISrcMods::OP_SEL_0, O);
}
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index aee2f2cb3d771..fcc9961b722ea 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -67,6 +67,7 @@ class VOP3Interp<string OpName, VOPProfile P, list<dag> pattern = []> :
VOP3_Pseudo<OpName, P, pattern> {
let AsmMatchConverter = "cvtVOP3Interp";
let mayRaiseFPException = 0;
+ let VOP3_OPSEL = P.HasOpSel;
}
def VOP3_INTERP : VOPProfile<[f32, f32, i32, untyped]> {
@@ -89,16 +90,17 @@ def VOP3_INTERP_MOV : VOPProfile<[f32, i32, i32, untyped]> {
let HasSrc0Mods = 0;
}
-class getInterp16Asm <bit HasSrc2, bit HasOMod> {
+class getInterp16Asm <bit HasSrc2, bit HasOMod, bit OpSel> {
string src2 = !if(HasSrc2, ", $src2_modifiers", "");
string omod = !if(HasOMod, "$omod", "");
+ string opsel = !if(OpSel, "$op_sel", "");
string ret =
- " $vdst, $src0_modifiers, $attr$attrchan"#src2#"$high$clamp"#omod;
+ " $vdst, $src0_modifiers, $attr$attrchan"#src2#"$high$clamp"#omod#opsel;
}
class getInterp16Ins <bit HasSrc2, bit HasOMod,
- Operand Src0Mod, Operand Src2Mod> {
- dag ret = !if(HasSrc2,
+ Operand Src0Mod, Operand Src2Mod, bit OpSel> {
+ dag ret1 = !if(HasSrc2,
!if(HasOMod,
(ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
InterpAttr:$attr, InterpAttrChan:$attrchan,
@@ -113,19 +115,22 @@ class getInterp16Ins <bit HasSrc2, bit HasOMod,
InterpAttr:$attr, InterpAttrChan:$attrchan,
highmod:$high, Clamp0:$clamp, omod0:$omod)
);
+ dag ret2 = !if(OpSel, (ins op_sel0:$op_sel), (ins));
+ dag ret = !con(ret1, ret2);
}
-class VOP3_INTERP16 <list<ValueType> ArgVT> : VOPProfile<ArgVT> {
+class VOP3_INTERP16 <list<ValueType> ArgVT, bit OpSel = 0> : VOPProfile<ArgVT> {
let IsSingle = 1;
let HasOMod = !ne(DstVT.Value, f16.Value);
let HasHigh = 1;
+ let HasOpSel = OpSel;
let Src0Mod = FPVRegInputMods;
let Src2Mod = FPVRegInputMods;
let Outs64 = (outs DstRC.RegClass:$vdst);
- let Ins64 = getInterp16Ins<HasSrc2, HasOMod, Src0Mod, Src2Mod>.ret;
- let Asm64 = getInterp16Asm<HasSrc2, HasOMod>.ret;
+ let Ins64 = getInterp16Ins<HasSrc2, HasOMod, Src0Mod, Src2Mod, OpSel>.ret;
+ let Asm64 = getInterp16Asm<HasSrc2, HasOMod, OpSel>.ret;
}
//===----------------------------------------------------------------------===//
@@ -435,7 +440,7 @@ let SubtargetPredicate = isGFX9Plus in {
defm V_MAD_U16_gfx9 : VOP3Inst_t16 <"v_mad_u16_gfx9", VOP_I16_I16_I16_I16>;
defm V_MAD_I16_gfx9 : VOP3Inst_t16 <"v_mad_i16_gfx9", VOP_I16_I16_I16_I16>;
let OtherPredicates = [isNotGFX90APlus] in
-def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>;
+def V_INTERP_P2_F16_opsel : VOP3Interp <"v_interp_p2_f16_opsel", VOP3_INTERP16<[f16, f32, i32, f32], /*OpSel*/ 1>>;
} // End SubtargetPredicate = isGFX9Plus
// This predicate should only apply to the selection pattern. The
@@ -2241,6 +2246,14 @@ multiclass VOP3Interp_F16_Real_gfx9<bits<10> op, string OpName, string AsmName>
}
}
+multiclass VOP3Interp_F16_OpSel_Real_gfx9<bits<10> op, string OpName, string AsmName> {
+ def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(OpName), SIEncodingFamily.GFX9>,
+ VOP3Interp_OpSel_gfx9 <op, !cast<VOP3_Pseudo>(OpName).Pfl> {
+ VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName);
+ let AsmString = AsmName # ps.AsmOperands;
+ }
+}
+
multiclass VOP3_Real_gfx9<bits<10> op, string AsmName> {
def _gfx9 : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>,
VOP3e_vi <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> {
@@ -2353,7 +2366,7 @@ defm V_MAD_U16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x204, "v_mad_u16">;
defm V_MAD_I16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x205, "v_mad_i16">;
defm V_FMA_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x206, "v_fma_f16">;
defm V_DIV_FIXUP_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x207, "v_div_fixup_f16">;
-defm V_INTERP_P2_F16_gfx9 : VOP3Interp_F16_Real_gfx9 <0x277, "V_INTERP_P2_F16_gfx9", "v_interp_p2_f16">;
+defm V_INTERP_P2_F16_opsel : VOP3Interp_F16_OpSel_Real_gfx9 <0x277, "V_INTERP_P2_F16_opsel", "v_interp_p2_f16">;
defm V_ADD_I32 : VOP3_Real_vi <0x29c>;
defm V_SUB_I32 : VOP3_Real_vi <0x29d>;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index c21e2d38398fa..64f0a32d0c0a4 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -401,6 +401,10 @@ class VOP3Interp_vi <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> {
let Inst{49-41} = src0;
}
+class VOP3Interp_OpSel_gfx9<bits<10> op, VOPProfile p> : VOP3Interp_vi<op, p> {
+ let Inst{14} = !if(p.HasDst, src0_modifiers{3}, 0);
+}
+
class VOP3Interp_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> {
bits<6> attr;
bits<2> attrchan;
diff --git a/llvm/test/MC/AMDGPU/vop3-gfx9.s b/llvm/test/MC/AMDGPU/vop3-gfx9.s
index a61b0c87e199f..0a43cb9615461 100644
--- a/llvm/test/MC/AMDGPU/vop3-gfx9.s
+++ b/llvm/test/MC/AMDGPU/vop3-gfx9.s
@@ -566,6 +566,36 @@ v_interp_p2_f16 v5, v2, attr0.x, v3 clamp
// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// VI: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x76,0xd2,0x00,0x04,0x0e,0x04]
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1,1,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 high op_sel:[0,0,0,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 high op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x05,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 clamp op_sel:[0,0,0,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp op_sel:[0,0,0,1] ; encoding: [0x05,0xc0,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
v_interp_p2_legacy_f16 v5, v2, attr31.x, v3
// GFX9: v_interp_p2_legacy_f16 v5, v2, attr31.x, v3 ; encoding: [0x05,0x00,0x76,0xd2,0x1f,0x04,0x0e,0x04]
// NOGCN: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt
index 802d6368507e2..35b02602e39df 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt
@@ -19311,6 +19311,15 @@
# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x77,0xd2,0x00,0x04,0x0e,0x04]
0x05,0x80,0x77,0xd2,0x00,0x04,0x0e,0x04
+# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x04,0x0e,0x04]
+0x05,0x40,0x77,0xd2,0x00,0x04,0x0e,0x04
+
+# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 high op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x05,0x0e,0x04]
+0x05,0x40,0x77,0xd2,0x00,0x05,0x0e,0x04
+
+# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp op_sel:[0,0,0,1] ; encoding: [0x05,0xc0,0x77,0xd2,0x00,0x04,0x0e,0x04]
+0x05,0xc0,0x77,0xd2,0x00,0x04,0x0e,0x04
+
# CHECK: v_add_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x80,0xd2,0x01,0x05,0x02,0x00]
0x05,0x00,0x80,0xd2,0x01,0x05,0x02,0x00
|
According to llvm/llvm-project#150712, op_sel[3] can be used in v_interp_p2_f16 in GFX9. Implement that for ACO plus make use of it via a peephole optimization in the optimizer. Noticed small perf gain in Cyberpunk.
AMDGPU documentation states op_sel[3] can be used in v_interp_p2_f16 in GFX9.