Skip to content

[AMDGPU][MC] GFX9 - allow op_sel in v_interp_p2_f16 #150712

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9164,6 +9164,26 @@ void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
addOptionalImmOperand(Inst, Operands, OptionalIdx,
AMDGPUOperand::ImmTyOModSI);

// Some v_interp instrutions use op_sel[3] for dst.
if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
addOptionalImmOperand(Inst, Operands, OptionalIdx,
AMDGPUOperand::ImmTyOpSel);

int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();

// Check if op_sel[3] is set, which is meant for dst.
if ((OpSel & (1 << 3)) != 0) {
int ModIdx =
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
uint32_t ModVal = Inst.getOperand(ModIdx).getImm();

ModVal |= SISrcMods::DST_OP_SEL;

Inst.getOperand(ModIdx).setImm(ModVal);
}
}
}

void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1293,6 +1293,14 @@ void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
O << " op_sel:[" << FI << ',' << BC << ']';
return;
}
if (Opc == AMDGPU::V_INTERP_P2_F16_opsel_gfx9) {
int ModIdx =
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
uint32_t ModVal = MI->getOperand(ModIdx).getImm();
if (ModVal & SISrcMods::DST_OP_SEL)
O << " op_sel:[0,0,0,1]";
return;
}

printPackedModifier(MI, " op_sel:[", SISrcMods::OP_SEL_0, O);
}
Expand Down
31 changes: 22 additions & 9 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class VOP3Interp<string OpName, VOPProfile P, list<dag> pattern = []> :
VOP3_Pseudo<OpName, P, pattern> {
let AsmMatchConverter = "cvtVOP3Interp";
let mayRaiseFPException = 0;
let VOP3_OPSEL = P.HasOpSel;
}

def VOP3_INTERP : VOPProfile<[f32, f32, i32, untyped]> {
Expand All @@ -89,16 +90,17 @@ def VOP3_INTERP_MOV : VOPProfile<[f32, i32, i32, untyped]> {
let HasSrc0Mods = 0;
}

class getInterp16Asm <bit HasSrc2, bit HasOMod> {
class getInterp16Asm <bit HasSrc2, bit HasOMod, bit OpSel> {
string src2 = !if(HasSrc2, ", $src2_modifiers", "");
string omod = !if(HasOMod, "$omod", "");
string opsel = !if(OpSel, "$op_sel", "");
string ret =
" $vdst, $src0_modifiers, $attr$attrchan"#src2#"$high$clamp"#omod;
" $vdst, $src0_modifiers, $attr$attrchan"#src2#"$high$clamp"#omod#opsel;
}

class getInterp16Ins <bit HasSrc2, bit HasOMod,
Operand Src0Mod, Operand Src2Mod> {
dag ret = !if(HasSrc2,
Operand Src0Mod, Operand Src2Mod, bit OpSel> {
dag ret1 = !if(HasSrc2,
!if(HasOMod,
(ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
InterpAttr:$attr, InterpAttrChan:$attrchan,
Expand All @@ -113,19 +115,22 @@ class getInterp16Ins <bit HasSrc2, bit HasOMod,
InterpAttr:$attr, InterpAttrChan:$attrchan,
highmod:$high, Clamp0:$clamp, omod0:$omod)
);
dag ret2 = !if(OpSel, (ins op_sel0:$op_sel), (ins));
dag ret = !con(ret1, ret2);
}

class VOP3_INTERP16 <list<ValueType> ArgVT> : VOPProfile<ArgVT> {
class VOP3_INTERP16 <list<ValueType> ArgVT, bit OpSel = 0> : VOPProfile<ArgVT> {
let IsSingle = 1;
let HasOMod = !ne(DstVT.Value, f16.Value);
let HasHigh = 1;
let HasOpSel = OpSel;

let Src0Mod = FPVRegInputMods;
let Src2Mod = FPVRegInputMods;

let Outs64 = (outs DstRC.RegClass:$vdst);
let Ins64 = getInterp16Ins<HasSrc2, HasOMod, Src0Mod, Src2Mod>.ret;
let Asm64 = getInterp16Asm<HasSrc2, HasOMod>.ret;
let Ins64 = getInterp16Ins<HasSrc2, HasOMod, Src0Mod, Src2Mod, OpSel>.ret;
let Asm64 = getInterp16Asm<HasSrc2, HasOMod, OpSel>.ret;
}

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -435,7 +440,7 @@ let SubtargetPredicate = isGFX9Plus in {
defm V_MAD_U16_gfx9 : VOP3Inst_t16 <"v_mad_u16_gfx9", VOP_I16_I16_I16_I16>;
defm V_MAD_I16_gfx9 : VOP3Inst_t16 <"v_mad_i16_gfx9", VOP_I16_I16_I16_I16>;
let OtherPredicates = [isNotGFX90APlus] in
def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>;
def V_INTERP_P2_F16_opsel : VOP3Interp <"v_interp_p2_f16_opsel", VOP3_INTERP16<[f16, f32, i32, f32], /*OpSel*/ 1>>;
} // End SubtargetPredicate = isGFX9Plus

// This predicate should only apply to the selection pattern. The
Expand Down Expand Up @@ -2241,6 +2246,14 @@ multiclass VOP3Interp_F16_Real_gfx9<bits<10> op, string OpName, string AsmName>
}
}

multiclass VOP3Interp_F16_OpSel_Real_gfx9<bits<10> op, string OpName, string AsmName> {
def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(OpName), SIEncodingFamily.GFX9>,
VOP3Interp_OpSel_gfx9 <op, !cast<VOP3_Pseudo>(OpName).Pfl> {
VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName);
let AsmString = AsmName # ps.AsmOperands;
}
}

multiclass VOP3_Real_gfx9<bits<10> op, string AsmName> {
def _gfx9 : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>,
VOP3e_vi <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> {
Expand Down Expand Up @@ -2353,7 +2366,7 @@ defm V_MAD_U16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x204, "v_mad_u16">;
defm V_MAD_I16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x205, "v_mad_i16">;
defm V_FMA_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x206, "v_fma_f16">;
defm V_DIV_FIXUP_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x207, "v_div_fixup_f16">;
defm V_INTERP_P2_F16_gfx9 : VOP3Interp_F16_Real_gfx9 <0x277, "V_INTERP_P2_F16_gfx9", "v_interp_p2_f16">;
defm V_INTERP_P2_F16_opsel : VOP3Interp_F16_OpSel_Real_gfx9 <0x277, "V_INTERP_P2_F16_opsel", "v_interp_p2_f16">;

defm V_ADD_I32 : VOP3_Real_vi <0x29c>;
defm V_SUB_I32 : VOP3_Real_vi <0x29d>;
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/VOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,10 @@ class VOP3Interp_vi <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> {
let Inst{49-41} = src0;
}

class VOP3Interp_OpSel_gfx9<bits<10> op, VOPProfile p> : VOP3Interp_vi<op, p> {
let Inst{14} = !if(p.HasDst, src0_modifiers{3}, 0);
}

class VOP3Interp_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> {
bits<6> attr;
bits<2> attrchan;
Expand Down
30 changes: 30 additions & 0 deletions llvm/test/MC/AMDGPU/vop3-gfx9.s
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,36 @@ v_interp_p2_f16 v5, v2, attr0.x, v3 clamp
// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// VI: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x76,0xd2,0x00,0x04,0x0e,0x04]

v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1]
// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04]
// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.

v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1]
// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04]
// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.

v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1,1]
// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04]
// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.

v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1,1,1]
// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x04,0x0e,0x04]
// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.

v_interp_p2_f16 v5, v2, attr0.x, v3 high op_sel:[0,0,0,1]
// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 high op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x05,0x0e,0x04]
// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.

v_interp_p2_f16 v5, v2, attr0.x, v3 clamp op_sel:[0,0,0,1]
// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp op_sel:[0,0,0,1] ; encoding: [0x05,0xc0,0x77,0xd2,0x00,0x04,0x0e,0x04]
// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.

v_interp_p2_legacy_f16 v5, v2, attr31.x, v3
// GFX9: v_interp_p2_legacy_f16 v5, v2, attr31.x, v3 ; encoding: [0x05,0x00,0x76,0xd2,0x1f,0x04,0x0e,0x04]
// NOGCN: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
Expand Down
9 changes: 9 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19311,6 +19311,15 @@
# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x77,0xd2,0x00,0x04,0x0e,0x04]
0x05,0x80,0x77,0xd2,0x00,0x04,0x0e,0x04

# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x04,0x0e,0x04]
0x05,0x40,0x77,0xd2,0x00,0x04,0x0e,0x04

# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 high op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x05,0x0e,0x04]
0x05,0x40,0x77,0xd2,0x00,0x05,0x0e,0x04

# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp op_sel:[0,0,0,1] ; encoding: [0x05,0xc0,0x77,0xd2,0x00,0x04,0x0e,0x04]
0x05,0xc0,0x77,0xd2,0x00,0x04,0x0e,0x04

# CHECK: v_add_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x80,0xd2,0x01,0x05,0x02,0x00]
0x05,0x00,0x80,0xd2,0x01,0x05,0x02,0x00

Expand Down