diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 0d2feeb4edea3..7ea44130a637d 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -9310,6 +9310,26 @@ void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); + + // Some v_interp instrutions use op_sel[3] for dst. + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyOpSel); + + int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); + unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); + + // Check if op_sel[3] is set, which is meant for dst. + if ((OpSel & (1 << 3)) != 0) { + int ModIdx = + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); + uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); + + ModVal |= SISrcMods::DST_OP_SEL; + + Inst.getOperand(ModIdx).setImm(ModVal); + } + } } void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index ee8683a549a80..8ecde94b19cc1 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -1299,6 +1299,14 @@ void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned, O << " op_sel:[" << FI << ',' << BC << ']'; return; } + if (Opc == AMDGPU::V_INTERP_P2_F16_opsel_gfx9) { + int ModIdx = + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); + uint32_t ModVal = MI->getOperand(ModIdx).getImm(); + if (ModVal & SISrcMods::DST_OP_SEL) + O << " op_sel:[0,0,0,1]"; + return; + } printPackedModifier(MI, " op_sel:[", SISrcMods::OP_SEL_0, O); } diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index f4b6af647ca1a..7a58ea40e82da 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -75,6 +75,7 @@ class VOP3Interp pattern = []> : VOP3_Pseudo { let AsmMatchConverter = "cvtVOP3Interp"; let mayRaiseFPException = 0; + let VOP3_OPSEL = P.HasOpSel; } def VOP3_INTERP : VOPProfile<[f32, f32, i32, untyped]> { @@ -97,16 +98,17 @@ def VOP3_INTERP_MOV : VOPProfile<[f32, i32, i32, untyped]> { let HasSrc0Mods = 0; } -class getInterp16Asm { +class getInterp16Asm { string src2 = !if(HasSrc2, ", $src2_modifiers", ""); string omod = !if(HasOMod, "$omod", ""); + string opsel = !if(OpSel, "$op_sel", ""); string ret = - " $vdst, $src0_modifiers, $attr$attrchan"#src2#"$high$clamp"#omod; + " $vdst, $src0_modifiers, $attr$attrchan"#src2#"$high$clamp"#omod#opsel; } class getInterp16Ins { - dag ret = !if(HasSrc2, + Operand Src0Mod, Operand Src2Mod, bit OpSel> { + dag ret1 = !if(HasSrc2, !if(HasOMod, (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0, InterpAttr:$attr, InterpAttrChan:$attrchan, @@ -121,19 +123,22 @@ class getInterp16Ins ArgVT> : VOPProfile { +class VOP3_INTERP16 ArgVT, bit OpSel = 0> : VOPProfile { let IsSingle = 1; let HasOMod = !ne(DstVT.Value, f16.Value); let HasHigh = 1; + let HasOpSel = OpSel; let Src0Mod = FPVRegInputMods; let Src2Mod = FPVRegInputMods; let Outs64 = (outs DstRC.RegClass:$vdst); - let Ins64 = getInterp16Ins.ret; - let Asm64 = getInterp16Asm.ret; + let Ins64 = getInterp16Ins.ret; + let Asm64 = getInterp16Asm.ret; } //===----------------------------------------------------------------------===// @@ -459,7 +464,7 @@ let SubtargetPredicate = isGFX9Plus in { defm V_MAD_U16_gfx9 : VOP3Inst_t16 <"v_mad_u16_gfx9", VOP_I16_I16_I16_I16>; defm V_MAD_I16_gfx9 : VOP3Inst_t16 <"v_mad_i16_gfx9", VOP_I16_I16_I16_I16>; let OtherPredicates = [isNotGFX90APlus] in -def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>; +def V_INTERP_P2_F16_opsel : VOP3Interp <"v_interp_p2_f16_opsel", VOP3_INTERP16<[f16, f32, i32, f32], /*OpSel*/ 1>>; } // End SubtargetPredicate = isGFX9Plus // This predicate should only apply to the selection pattern. The @@ -2624,6 +2629,14 @@ multiclass VOP3Interp_F16_Real_gfx9 op, string OpName, string AsmName> } } +multiclass VOP3Interp_F16_OpSel_Real_gfx9 op, string OpName, string AsmName> { + def _gfx9 : VOP3_Real(OpName), SIEncodingFamily.GFX9>, + VOP3Interp_OpSel_gfx9 (OpName).Pfl> { + VOP3_Pseudo ps = !cast(OpName); + let AsmString = AsmName # ps.AsmOperands; + } +} + multiclass VOP3_Real_gfx9 op, string AsmName> { def _gfx9 : VOP3_Real(NAME#"_e64"), SIEncodingFamily.GFX9>, VOP3e_vi (NAME#"_e64").Pfl> { @@ -2736,7 +2749,7 @@ defm V_MAD_U16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x204, "v_mad_u16">; defm V_MAD_I16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x205, "v_mad_i16">; defm V_FMA_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x206, "v_fma_f16">; defm V_DIV_FIXUP_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x207, "v_div_fixup_f16">; -defm V_INTERP_P2_F16_gfx9 : VOP3Interp_F16_Real_gfx9 <0x277, "V_INTERP_P2_F16_gfx9", "v_interp_p2_f16">; +defm V_INTERP_P2_F16_opsel : VOP3Interp_F16_OpSel_Real_gfx9 <0x277, "V_INTERP_P2_F16_opsel", "v_interp_p2_f16">; defm V_ADD_I32 : VOP3_Real_vi <0x29c>; defm V_SUB_I32 : VOP3_Real_vi <0x29d>; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 3cad5a1c2c377..cf91f81540e27 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -421,6 +421,10 @@ class VOP3a_ScaleSel_gfx1250 op, VOPProfile p> : VOP3e_gfx11_gfx12 op, VOPProfile p> : VOP3Interp_vi { + let Inst{14} = !if(p.HasDst, src0_modifiers{3}, 0); +} + class VOP3Interp_gfx10 op, VOPProfile p> : VOP3e_gfx10 { bits<6> attr; bits<2> attrchan; diff --git a/llvm/test/MC/AMDGPU/vop3-gfx9.s b/llvm/test/MC/AMDGPU/vop3-gfx9.s index a61b0c87e199f..0a43cb9615461 100644 --- a/llvm/test/MC/AMDGPU/vop3-gfx9.s +++ b/llvm/test/MC/AMDGPU/vop3-gfx9.s @@ -566,6 +566,36 @@ v_interp_p2_f16 v5, v2, attr0.x, v3 clamp // NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU // VI: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x76,0xd2,0x00,0x04,0x0e,0x04] +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1,1] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1,1,1] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 high op_sel:[0,0,0,1] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 high op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x05,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + +v_interp_p2_f16 v5, v2, attr0.x, v3 clamp op_sel:[0,0,0,1] +// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp op_sel:[0,0,0,1] ; encoding: [0x05,0xc0,0x77,0xd2,0x00,0x04,0x0e,0x04] +// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand. + v_interp_p2_legacy_f16 v5, v2, attr31.x, v3 // GFX9: v_interp_p2_legacy_f16 v5, v2, attr31.x, v3 ; encoding: [0x05,0x00,0x76,0xd2,0x1f,0x04,0x0e,0x04] // NOGCN: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt index 802d6368507e2..35b02602e39df 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt @@ -19311,6 +19311,15 @@ # CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x77,0xd2,0x00,0x04,0x0e,0x04] 0x05,0x80,0x77,0xd2,0x00,0x04,0x0e,0x04 +# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x04,0x0e,0x04] +0x05,0x40,0x77,0xd2,0x00,0x04,0x0e,0x04 + +# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 high op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x05,0x0e,0x04] +0x05,0x40,0x77,0xd2,0x00,0x05,0x0e,0x04 + +# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp op_sel:[0,0,0,1] ; encoding: [0x05,0xc0,0x77,0xd2,0x00,0x04,0x0e,0x04] +0x05,0xc0,0x77,0xd2,0x00,0x04,0x0e,0x04 + # CHECK: v_add_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x80,0xd2,0x01,0x05,0x02,0x00] 0x05,0x00,0x80,0xd2,0x01,0x05,0x02,0x00