[NFC][AMDGPU] Add an IR test for v_cvt_f16_bf8#149627
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-backend-amdgpu Author: Shilei Tian (shiltian) ChangesThis was left during the upstream. Co-authored-by: Mekhanoshin, Stanislav <Stanislav.Mekhanoshin@amd.com> Full diff: https://github.com/llvm/llvm-project/pull/149627.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll
index 25889ded91681..95653148b09f4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.ll
@@ -9,6 +9,172 @@ declare half @llvm.amdgcn.cvt.f16.fp8(i32, i32)
declare <2 x half> @llvm.amdgcn.cvt.pk.f16.bf8(i16)
declare <2 x half> @llvm.amdgcn.cvt.pk.f16.fp8(i16)
+define amdgpu_ps float @test_cvt_f16_bf8_byte0(i32 %a) {
+; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte0:
+; GFX1250-SDAG-REAL16: ; %bb.0:
+; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e32 v0.l, v0
+; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte0:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e32 v0, v0
+; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte0:
+; GFX1250-GISEL-REAL16: ; %bb.0:
+; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e32 v0.l, v0
+; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte0:
+; GFX1250-GISEL-FAKE16: ; %bb.0:
+; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e32 v0, v0
+; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
+ %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8(i32 %a, i32 0)
+ %ret = fpext half %cvt to float
+ ret float %ret
+}
+
+define amdgpu_ps float @test_cvt_f16_bf8_byte1(i32 %a) {
+; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte1:
+; GFX1250-SDAG-REAL16: ; %bb.0:
+; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:1
+; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte1:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:1
+; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte1:
+; GFX1250-GISEL-REAL16: ; %bb.0:
+; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:1
+; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte1:
+; GFX1250-GISEL-FAKE16: ; %bb.0:
+; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:1
+; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
+ %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8(i32 %a, i32 1)
+ %ret = fpext half %cvt to float
+ ret float %ret
+}
+
+define amdgpu_ps float @test_cvt_f16_bf8_byte2(i32 %a) {
+; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte2:
+; GFX1250-SDAG-REAL16: ; %bb.0:
+; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:2
+; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte2:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:2
+; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte2:
+; GFX1250-GISEL-REAL16: ; %bb.0:
+; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:2
+; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte2:
+; GFX1250-GISEL-FAKE16: ; %bb.0:
+; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:2
+; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
+ %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8(i32 %a, i32 2)
+ %ret = fpext half %cvt to float
+ ret float %ret
+}
+
+define amdgpu_ps float @test_cvt_f16_bf8_byte3(i32 %a) {
+; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte3:
+; GFX1250-SDAG-REAL16: ; %bb.0:
+; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:3
+; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte3:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3
+; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte3:
+; GFX1250-GISEL-REAL16: ; %bb.0:
+; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:3
+; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte3:
+; GFX1250-GISEL-FAKE16: ; %bb.0:
+; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3
+; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
+ %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8(i32 %a, i32 3)
+ %ret = fpext half %cvt to float
+ ret float %ret
+}
+
+define amdgpu_ps float @test_cvt_f16_bf8_byte3_hi(i32 %a) {
+; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte3_hi:
+; GFX1250-SDAG-REAL16: ; %bb.0:
+; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.h, v0 byte_sel:3
+; GFX1250-SDAG-REAL16-NEXT: v_mov_b16_e32 v0.l, 0
+; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte3_hi:
+; GFX1250-SDAG-FAKE16: ; %bb.0:
+; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3
+; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, 0, 0x5040100
+; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte3_hi:
+; GFX1250-GISEL-REAL16: ; %bb.0:
+; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:3
+; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-REAL16-NEXT: v_lshl_or_b32 v0, v0, 16, 0
+; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte3_hi:
+; GFX1250-GISEL-FAKE16: ; %bb.0:
+; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3
+; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, 0
+; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
+ %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8(i32 %a, i32 3)
+ %ins.0 = insertelement <2 x half> undef, half 0.0, i32 0
+ %ins.1 = insertelement <2 x half> %ins.0, half %cvt, i32 1
+ %ret = bitcast <2 x half> %ins.1 to float
+ ret float %ret
+}
+
define amdgpu_ps float @test_cvt_f16_fp8_byte0(i32 %a) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_fp8_byte0:
; GFX1250-SDAG-REAL16: ; %bb.0:
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.f16.fp8.llThe following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
}Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
}Please refer to the Undefined Behavior Manual for more information. |
4adb8ff to
46a6dd7
Compare
This was left during the upstream. Co-authored-by: Mekhanoshin, Stanislav <Stanislav.Mekhanoshin@amd.com>
46a6dd7 to
c0d9363
Compare
This was left during the upstream. Co-authored-by: Mekhanoshin, Stanislav <Stanislav.Mekhanoshin@amd.com>

This was left during the upstream.
Co-authored-by: Mekhanoshin, Stanislav Stanislav.Mekhanoshin@amd.com