@@ -652,6 +652,60 @@ void test_prefetch(generic void *fptr, global void *gptr) {
652
652
__builtin_amdgcn_global_prefetch (gptr , 8 );
653
653
}
654
654
655
+ // CHECK-LABEL: @test_cvt_pk_fp8_f32_e5m3(
656
+ // CHECK-NEXT: entry:
657
+ // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
658
+ // CHECK-NEXT: [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
659
+ // CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5)
660
+ // CHECK-NEXT: [[B_ADDR:%.*]] = alloca float, align 4, addrspace(5)
661
+ // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
662
+ // CHECK-NEXT: [[OLD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OLD_ADDR]] to ptr
663
+ // CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
664
+ // CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
665
+ // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
666
+ // CHECK-NEXT: store i32 [[OLD:%.*]], ptr [[OLD_ADDR_ASCAST]], align 4
667
+ // CHECK-NEXT: store float [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
668
+ // CHECK-NEXT: store float [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
669
+ // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4
670
+ // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[B_ADDR_ASCAST]], align 4
671
+ // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
672
+ // CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cvt.pk.fp8.f32.e5m3(float [[TMP0]], float [[TMP1]], i32 [[TMP2]], i1 true)
673
+ // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
674
+ // CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
675
+ // CHECK-NEXT: ret void
676
+ //
677
+ void test_cvt_pk_fp8_f32_e5m3 (global int * out , int old , float a , float b )
678
+ {
679
+ * out = __builtin_amdgcn_cvt_pk_fp8_f32_e5m3 (a , b , old , true);
680
+ }
681
+
682
+ // CHECK-LABEL: @test_cvt_sr_fp8_f32_e5m3(
683
+ // CHECK-NEXT: entry:
684
+ // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
685
+ // CHECK-NEXT: [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
686
+ // CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5)
687
+ // CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
688
+ // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
689
+ // CHECK-NEXT: [[OLD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OLD_ADDR]] to ptr
690
+ // CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
691
+ // CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
692
+ // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
693
+ // CHECK-NEXT: store i32 [[OLD:%.*]], ptr [[OLD_ADDR_ASCAST]], align 4
694
+ // CHECK-NEXT: store float [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
695
+ // CHECK-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
696
+ // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4
697
+ // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4
698
+ // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
699
+ // CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f32.e5m3(float [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 3)
700
+ // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
701
+ // CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
702
+ // CHECK-NEXT: ret void
703
+ //
704
+ void test_cvt_sr_fp8_f32_e5m3 (global int * out , int old , float a , int b )
705
+ {
706
+ * out = __builtin_amdgcn_cvt_sr_fp8_f32_e5m3 (a , b , old , 3 );
707
+ }
708
+
655
709
// CHECK-LABEL: @test_cvt_f32_fp8_e5m3(
656
710
// CHECK-NEXT: entry:
657
711
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
0 commit comments