From f9075098fc1e7741f7ec96053a7586be03dd9ff9 Mon Sep 17 00:00:00 2001 From: Pravin Jagtap Date: Tue, 16 Jan 2024 09:34:12 -0500 Subject: [PATCH] AMDGPU: Add v_prng_b32 instruction for gfx950 Rand num instruction for stochastic rounding. --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 + clang/test/CodeGenOpenCL/amdgpu-features.cl | 2 +- .../builtins-amdgcn-gfx950-err.cl | 16 ++++++ .../CodeGenOpenCL/builtins-amdgcn-gfx950.cl | 21 +++++++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 4 ++ llvm/lib/Target/AMDGPU/AMDGPU.td | 10 ++++ .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 6 ++ .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 1 + llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 +- llvm/lib/Target/AMDGPU/VOP1Instructions.td | 5 ++ llvm/lib/TargetParser/TargetParser.cpp | 1 + llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll | 32 +++++++++++ llvm/test/MC/AMDGPU/gfx950_asm_vop1.s | 57 +++++++++++++++++++ llvm/test/MC/AMDGPU/gfx950_asm_vop1_dpp16.s | 31 ++++++++++ .../Disassembler/AMDGPU/gfx950_dasm_vop1.txt | 43 ++++++++++++++ .../InstCombine/AMDGPU/amdgcn-intrinsics.ll | 18 ++++++ 16 files changed, 251 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll create mode 100644 llvm/test/MC/AMDGPU/gfx950_asm_vop1.s create mode 100644 llvm/test/MC/AMDGPU/gfx950_asm_vop1_dpp16.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop1.txt diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 8f44afa4059386..61516eb2a4a723 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -522,5 +522,7 @@ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64, "V4fiV2iV4fs", TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64") TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64") +TARGET_BUILTIN(__builtin_amdgcn_prng_b32, "UiUi", "nc", "prng-inst") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index 5c324032b51956..61cbf5e65d0d21 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -89,7 +89,7 @@ // GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" // GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" // GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX950: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX950: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl new file mode 100644 index 00000000000000..86f4f73c81c0fc --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx906 -emit-llvm \ +// RUN: -verify -o - %s +// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx90a -emit-llvm \ +// RUN: -verify -o - %s +// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx940 -emit-llvm \ +// RUN: -verify -o - %s +// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx1200 -emit-llvm \ +// RUN: -verify -o - %s + + +// REQUIRES: amdgpu-registered-target + +typedef unsigned int uint; +void test_prng_b32(global uint* out, uint a) { + *out = __builtin_amdgcn_prng_b32(a); // expected-error{{'__builtin_amdgcn_prng_b32' needs target feature prng-inst}} +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl new file mode 100644 index 00000000000000..f31ba85a52a7ad --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl @@ -0,0 +1,21 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -cl-std=CL1.2 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx950 -emit-llvm -o - %s | FileCheck %s +// REQUIRES: amdgpu-registered-target + +typedef unsigned int uint; + +// CHECK-LABEL: @test_prng_b32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.prng.b32(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[TMP2]], align 4 +// CHECK-NEXT: ret void +// +void test_prng_b32(global uint* out, uint a) { + *out = __builtin_amdgcn_prng_b32(a); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 4829453ee57cd2..ed73f0a69e6130 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -594,6 +594,10 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic; def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic; def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic; +def int_amdgcn_prng_b32 : DefaultAttrsIntrinsic< + [llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>, ClangBuiltin<"__builtin_amdgcn_prng_b32">; + } // TargetPrefix = "amdgcn" // New-style image intrinsics diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index e84fdf54866cdd..09f8dde07b740e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -978,6 +978,12 @@ def FeatureVmemWriteVgprInOrder : SubtargetFeature<"vmem-write-vgpr-in-order", "VMEM instructions of the same type write VGPR results in order" >; +def FeaturePrngInst : SubtargetFeature<"prng-inst", + "HasPrngInst", + "true", + "Has v_prng_b32 instruction" +>; + //===------------------------------------------------------------===// // Subtarget Features (options and debugging) //===------------------------------------------------------------===// @@ -1498,6 +1504,7 @@ def FeatureISAVersion9_5_Common : FeatureSet< FeatureFP8ConversionInsts, FeatureCvtFP8VOP1Bug, FeatureGFX950Insts, + FeaturePrngInst ])>; def FeatureISAVersion9_4_0 : FeatureSet< @@ -2350,6 +2357,9 @@ def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">, def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">, AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>; +def HasPrngInst : Predicate<"Subtarget->hasPrngInst()">, + AssemblerPredicate<(all_of FeaturePrngInst)>; + def HasGDS : Predicate<"Subtarget->hasGDS()">; def HasGWS : Predicate<"Subtarget->hasGWS()">; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 8beb9defee66a0..28d215e7b3de9f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -1253,6 +1253,12 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { break; } + case Intrinsic::amdgcn_prng_b32: { + auto *Src = II.getArgOperand(0); + if (isa(Src)) { + return IC.replaceInstUsesWith(II, Src); + } + } } if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 415c068367074f..03e57db9c11ce5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4515,6 +4515,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_cvt_pk_u8_f32: case Intrinsic::amdgcn_alignbyte: case Intrinsic::amdgcn_perm: + case Intrinsic::amdgcn_prng_b32: case Intrinsic::amdgcn_fdot2: case Intrinsic::amdgcn_sdot2: case Intrinsic::amdgcn_udot2: diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 2e7a06a15bd52a..e722e046092fda 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -220,7 +220,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasSALUFloatInsts = false; bool HasPseudoScalarTrans = false; bool HasRestrictedSOffset = false; - + bool HasPrngInst = false; bool HasVcmpxPermlaneHazard = false; bool HasVMEMtoScalarWriteHazard = false; bool HasSMEMtoVectorWriteHazard = false; @@ -1321,6 +1321,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, /// instruction. unsigned maxHardClauseLength() const { return MaxHardClauseLength; } + bool hasPrngInst() const { return HasPrngInst; } + /// Return the maximum number of waves per SIMD for kernels using \p SGPRs /// SGPRs unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index f7a66a08209397..e99f562688926d 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -761,6 +761,9 @@ let SubtargetPredicate = isGFX11Plus in { defm V_CVT_U32_U16 : VOP1Inst_t16<"v_cvt_u32_u16", VOP_I32_I16>; } // End SubtargetPredicate = isGFX11Plus +let SubtargetPredicate = HasPrngInst in +defm V_PRNG_B32 : VOP1Inst <"v_prng_b32", VOP_I32_I32, int_amdgcn_prng_b32>; + foreach vt = Reg32Types.types in { def : GCNPat<(int_amdgcn_permlane64 (vt VRegSrc_32:$src0)), (vt (V_PERMLANE64_B32 (vt VRegSrc_32:$src0))) @@ -1516,6 +1519,8 @@ defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>; defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>; defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>; +defm V_PRNG_B32 : VOP1_Real_gfx9 <0x58>; + class MovDPP8Pattern : GCNPat < (vt (int_amdgcn_mov_dpp8 vt:$src, timm:$dpp8)), (Inst VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp8), (i32 DPP8Mode.FI_0))> { diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index b0385915f3042b..b236e26f495dfd 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -470,6 +470,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["gws"] = true; break; case GK_GFX950: + Features["prng-inst"] = true; Features["gfx950-insts"] = true; [[fallthrough]]; case GK_GFX942: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll new file mode 100644 index 00000000000000..eeef4eeb65a694 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll @@ -0,0 +1,32 @@ +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.prng.b32(i32) #0 + +; GCN-LABEL: {{^}}prng_b32: +; GCN: v_prng_b32_e32 {{v[0-9]+}}, {{s[0-9]+}} +define amdgpu_kernel void @prng_b32(ptr addrspace(1) %out, i32 %src) #1 { + %prng = call i32 @llvm.amdgcn.prng.b32(i32 %src) #0 + store i32 %prng, ptr addrspace(1) %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}prng_b32_constant_4 +; GCN: v_prng_b32_e32 {{v[0-9]+}}, 4 +define amdgpu_kernel void @prng_b32_constant_4(ptr addrspace(1) %out) #1 { + %prng = call i32 @llvm.amdgcn.prng.b32(i32 4) #0 + store i32 %prng, ptr addrspace(1) %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}prng_b32_constant_100 +; GCN: v_prng_b32_e32 {{v[0-9]+}}, 0x64 +define amdgpu_kernel void @prng_b32_constant_100(ptr addrspace(1) %out) #1 { + %prng = call i32 @llvm.amdgcn.prng.b32(i32 100) #0 + store i32 %prng, ptr addrspace(1) %out, align 4 + ret void +} + + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } \ No newline at end of file diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx950_asm_vop1.s new file mode 100644 index 00000000000000..0cb292ffe63dde --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx950_asm_vop1.s @@ -0,0 +1,57 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s + +v_prng_b32 v5, v1 +// GFX950: v_prng_b32_e32 v5, v1 ; encoding: [0x01,0xb1,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_prng_b32 v5, v255 +// GFX950: v_prng_b32_e32 v5, v255 ; encoding: [0xff,0xb1,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_prng_b32 v5, s1 +// GFX950: v_prng_b32_e32 v5, s1 ; encoding: [0x01,0xb0,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_prng_b32 v5, s101 +// GFX950: v_prng_b32_e32 v5, s101 ; encoding: [0x65,0xb0,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_prng_b32 v5, vcc_lo +// GFX950: v_prng_b32_e32 v5, vcc_lo ; encoding: [0x6a,0xb0,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_prng_b32 v5, vcc_hi +// GFX950: v_prng_b32_e32 v5, vcc_hi ; encoding: [0x6b,0xb0,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_prng_b32 v5, ttmp15 +// GFX950: v_prng_b32_e32 v5, ttmp15 ; encoding: [0x7b,0xb0,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_prng_b32 v5, m0 +// GFX950: v_prng_b32_e32 v5, m0 ; encoding: [0x7c,0xb0,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_prng_b32 v5, exec_lo +// GFX950: v_prng_b32_e32 v5, exec_lo ; encoding: [0x7e,0xb0,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_prng_b32 v5, exec_hi +// GFX950: v_prng_b32_e32 v5, exec_hi ; encoding: [0x7f,0xb0,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_prng_b32 v5, -1 +// GFX950: v_prng_b32_e32 v5, -1 ; encoding: [0xc1,0xb0,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_prng_b32 v5, 0.5 +// GFX950: v_prng_b32_e32 v5, 0.5 ; encoding: [0xf0,0xb0,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_prng_b32 v5, src_scc +// GFX950: v_prng_b32_e32 v5, src_scc ; encoding: [0xfd,0xb0,0x0a,0x7e] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +v_prng_b32 v255, 0xaf123456 +// GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf] +// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx950_asm_vop1_dpp16.s new file mode 100644 index 00000000000000..301750689bc782 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx950_asm_vop1_dpp16.s @@ -0,0 +1,31 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefixes=GFX950 %s + +v_prng_b32 v5, v1 quad_perm:[3,2,1,0] +// GFX950: v_prng_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1b,0x00,0xff] + +v_prng_b32 v5, v1 quad_perm:[0,1,2,3] +// GFX950: v_prng_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0xe4,0x00,0xff] + +v_prng_b32 v5, v1 row_mirror +// GFX950: v_prng_b32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x40,0x01,0xff] + +v_prng_b32 v5, v1 row_half_mirror +// GFX950: v_prng_b32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x41,0x01,0xff] + +v_prng_b32 v5, v1 row_shl:1 +// GFX950: v_prng_b32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x01,0x01,0xff] + +v_prng_b32 v5, v1 row_shl:15 +// GFX950: v_prng_b32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x0f,0x01,0xff] + +v_prng_b32 v5, v1 row_shr:1 +// GFX950: v_prng_b32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x11,0x01,0xff] + +v_prng_b32 v5, v1 row_shr:15 +// GFX950: v_prng_b32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1f,0x01,0xff] + +v_prng_b32 v5, v1 row_ror:1 +// GFX950: v_prng_b32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x21,0x01,0xff] + +v_prng_b32 v5, v1 row_ror:15 +// GFX950: v_prng_b32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x2f,0x01,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop1.txt new file mode 100644 index 00000000000000..91ab05e99f1e7c --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop1.txt @@ -0,0 +1,43 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX950 %s + +# GFX950: v_prng_b32_e32 v5, v1 ; encoding: [0x01,0xb1,0x0a,0x7e] +0x01,0xb1,0x0a,0x7e + +# GFX950: v_prng_b32_e32 v5, v255 ; encoding: [0xff,0xb1,0x0a,0x7e] +0xff,0xb1,0x0a,0x7e + +# GFX950: v_prng_b32_e32 v5, s1 ; encoding: [0x01,0xb0,0x0a,0x7e] +0x01,0xb0,0x0a,0x7e + +# GFX950: v_prng_b32_e32 v5, s101 ; encoding: [0x65,0xb0,0x0a,0x7e] +0x65,0xb0,0x0a,0x7e + +# GFX950: v_prng_b32_e32 v5, vcc_lo ; encoding: [0x6a,0xb0,0x0a,0x7e] +0x6a,0xb0,0x0a,0x7e + +# GFX950: v_prng_b32_e32 v5, vcc_hi ; encoding: [0x6b,0xb0,0x0a,0x7e] +0x6b,0xb0,0x0a,0x7e + +# GFX950: v_prng_b32_e32 v5, ttmp15 ; encoding: [0x7b,0xb0,0x0a,0x7e] +0x7b,0xb0,0x0a,0x7e + +# GFX950: v_prng_b32_e32 v5, m0 ; encoding: [0x7c,0xb0,0x0a,0x7e] +0x7c,0xb0,0x0a,0x7e + +# GFX950: v_prng_b32_e32 v5, exec_lo ; encoding: [0x7e,0xb0,0x0a,0x7e] +0x7e,0xb0,0x0a,0x7e + +# GFX950: v_prng_b32_e32 v5, exec_hi ; encoding: [0x7f,0xb0,0x0a,0x7e] +0x7f,0xb0,0x0a,0x7e + +# GFX950: v_prng_b32_e32 v5, -1 ; encoding: [0xc1,0xb0,0x0a,0x7e] +0xc1,0xb0,0x0a,0x7e + +# GFX950: v_prng_b32_e32 v5, 0.5 ; encoding: [0xf0,0xb0,0x0a,0x7e] +0xf0,0xb0,0x0a,0x7e + +# GFX950: v_prng_b32_e32 v5, src_scc ; encoding: [0xfd,0xb0,0x0a,0x7e] +0xfd,0xb0,0x0a,0x7e + +# GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf] +0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf \ No newline at end of file diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll index 779def76fc58d3..5fdb918c875459 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -6547,3 +6547,21 @@ define half @test_constant_fold_exp2_f16_neg_denorm() { %val = call half @llvm.amdgcn.exp2.f16(half 0xH83ff) ret half %val } + +; -------------------------------------------------------------------- +; llvm.amdgcn.prng +; -------------------------------------------------------------------- +declare i32 @llvm.amdgcn.prng.b32(i32) +define i32 @prng_undef_i32() { +; CHECK-LABEL: @prng_undef_i32( +; CHECK-NEXT: ret i32 undef + %prng = call i32 @llvm.amdgcn.prng.b32(i32 undef) + ret i32 %prng +} + +define i32 @prng_poison_i32() { +; CHECK-LABEL: @prng_poison_i32( +; CHECK-NEXT: ret i32 poison + %prng = call i32 @llvm.amdgcn.prng.b32(i32 poison) + ret i32 %prng +}