llvm/llvm/test/CodeGen/AMDGPU/set-wave-priority.ll

; RUN: llc -mtriple=amdgcn -amdgpu-set-wave-priority=true -o - %s | \
; RUN:   FileCheck %s

; CHECK-LABEL: no_setprio:
; CHECK-NOT:       s_setprio
; CHECK:           ; return to shader part epilog
define amdgpu_ps <2 x float> @no_setprio(<2 x float> %a, <2 x float> %b) "amdgpu-wave-priority-threshold"="1" {
  %s = fadd <2 x float> %a, %b
  ret <2 x float> %s
}

; CHECK-LABEL: vmem_in_exit_block:
; CHECK:           s_setprio 3
; CHECK:           buffer_load_dwordx2
; CHECK-NEXT:      s_setprio 0
; CHECK:           ; return to shader part epilog
define amdgpu_ps <2 x float> @vmem_in_exit_block(ptr addrspace(8) inreg %p, <2 x float> %x) "amdgpu-wave-priority-threshold"="2" {
  %v = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 0, i32 0)
  %s = fadd <2 x float> %v, %x
  ret <2 x float> %s
}

; CHECK-LABEL: branch:
; CHECK:           s_setprio 3
; CHECK:           s_cbranch_scc0 [[A:.*]]
; CHECK:       {{.*}}:  ; %b
; CHECK:           buffer_load_dwordx2
; CHECK-NEXT:      s_setprio 0
; CHECK:           s_branch [[EXIT:.*]]
; CHECK:       [[A]]:  ; %a
; CHECK-NEXT:      s_setprio 0
; CHECK:           s_branch [[EXIT]]
; CHECK-NEXT:  [[EXIT]]:
define amdgpu_ps <2 x float> @branch(ptr addrspace(8) inreg %p, i32 inreg %i, <2 x float> %x) "amdgpu-wave-priority-threshold"="2" {
  %cond = icmp eq i32 %i, 0
  br i1 %cond, label %a, label %b

a:
  ret <2 x float> <float 0.0, float 0.0>

b:
  %v = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 0, i32 0)
  %s = fadd <2 x float> %v, %x
  ret <2 x float> %s
}

; CHECK-LABEL: setprio_follows_setprio:
; CHECK:           s_setprio 3
; CHECK:           buffer_load_dwordx2
; CHECK:           s_cbranch_scc1 [[C:.*]]
; CHECK:       {{.*}}:  ; %a
; CHECK:           buffer_load_dwordx2
; CHECK-NEXT:      s_setprio 0
; CHECK:           s_cbranch_vccnz [[C]]
; CHECK:       {{.*}}:  ; %b
; CHECK-NOT:       s_setprio
; CHECK:           s_branch [[EXIT:.*]]
; CHECK:       [[C]]:  ; %c
; CHECK-NEXT:      s_setprio 0
; CHECK:           s_branch [[EXIT]]
; CHECK:       [[EXIT]]:
define amdgpu_ps <2 x float> @setprio_follows_setprio(ptr addrspace(8) inreg %p, i32 inreg %i) "amdgpu-wave-priority-threshold"="3" {
entry:
  %v1 = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 0, i32 0)
  %cond1 = icmp ne i32 %i, 0
  br i1 %cond1, label %a, label %c

a:
  %v2 = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 1, i32 0)
  %v20 = extractelement <2 x float> %v2, i32 0
  %v21 = extractelement <2 x float> %v2, i32 1
  %cond2 = fcmp ult float %v20, %v21
  br i1 %cond2, label %b, label %c

b:
  ret <2 x float> %v2

c:
  %v4 = fadd <2 x float> %v1, %v1
  ret <2 x float> %v4
}

; CHECK-LABEL: loop:
; CHECK:       {{.*}}:  ; %entry
; CHECK:           s_setprio 3
; CHECK-NOT:       s_setprio
; CHECK:       [[LOOP:.*]]:  ; %loop
; CHECK-NOT:       s_setprio
; CHECK:           buffer_load_dwordx2
; CHECK-NOT:       s_setprio
; CHECK:           s_cbranch_scc1 [[LOOP]]
; CHECK-NEXT:  {{.*}}:  ; %exit
; CHECK-NEXT:      s_setprio 0
define amdgpu_ps <2 x float> @loop(ptr addrspace(8) inreg %p) "amdgpu-wave-priority-threshold"="2" {
entry:
  br label %loop

loop:
  %i = phi i32 [0, %entry], [%i2, %loop]
  %sum = phi <2 x float> [<float 0.0, float 0.0>, %entry], [%sum2, %loop]

  %i2 = add i32 %i, 1

  %v = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 %i, i32 0, i32 0, i32 0)
  %sum2 = fadd <2 x float> %sum, %v

  %cond = icmp ult i32 %i2, 5
  br i1 %cond, label %loop, label %exit

exit:
  ret <2 x float> %sum2
}

; CHECK-LABEL: edge_split:
; CHECK:           s_setprio 3
; CHECK:           buffer_load_dwordx2
; CHECK-NOT:       s_setprio
; CHECK:           s_cbranch_scc1 [[ANOTHER_LOAD:.*]]
; CHECK:       {{.*}}:  ; %loop.preheader
; CHECK-NEXT:      s_setprio 0
; CHECK:       [[LOOP:.*]]:  ; %loop
; CHECK-NOT:       s_setprio
; CHECK:           s_cbranch_scc1 [[LOOP]]
; CHECK        {{.*}}:  ; %exit
; CHECK-NOT:       s_setprio
; CHECK:           s_branch [[RET:.*]]
; CHECK:       [[ANOTHER_LOAD]]:  ; %another_load
; CHECK:           buffer_load_dwordx2
; CHECK-NEXT:      s_setprio 0
; CHECK:           s_branch [[RET]]
; CHECK:       [[RET]]:
define amdgpu_ps <2 x float> @edge_split(ptr addrspace(8) inreg %p, i32 inreg %x) "amdgpu-wave-priority-threshold"="2" {
entry:
  %v = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 0, i32 0)
  %cond = icmp ne i32 %x, 0
  br i1 %cond, label %loop, label %another_load

loop:
  %i = phi i32 [0, %entry], [%i2, %loop]
  %mul = phi <2 x float> [%v, %entry], [%mul2, %loop]

  %i2 = add i32 %i, 1
  %mul2 = fmul <2 x float> %mul, %v

  %cond2 = icmp ult i32 %i2, 5
  br i1 %cond2, label %loop, label %exit

exit:
  ret <2 x float> %mul2

another_load:
  %v2 = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 1, i32 0)
  %sum = fadd <2 x float> %v, %v2
  ret <2 x float> %sum
}

; CHECK-LABEL: valu_insts_threshold:
; CHECK:           s_setprio 3
; CHECK:           buffer_load_dwordx2
; CHECK-NEXT:      s_setprio 0
; CHECK-COUNT-4:   v_add_f32_e32
; CHECK:           s_cbranch_scc0 [[A:.*]]
; CHECK:       {{.*}}:  ; %b
; CHECK-NEXT:      buffer_load_dwordx2
; CHECK:           s_branch [[END:.*]]
; CHECK:       [[A]]:  ; %a
; CHECK:           s_branch [[END]]
; CHECK:       [[END]]:
define amdgpu_ps <2 x float> @valu_insts_threshold(ptr addrspace(8) inreg %p, i32 inreg %i) "amdgpu-wave-priority-threshold"="4" {
  %v = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 0, i32 0)
  %add = fadd <2 x float> %v, %v
  %add2 = fadd <2 x float> %add, %add

  %cond = icmp eq i32 %i, 0
  br i1 %cond, label %a, label %b

a:
  ret <2 x float> %add2

b:
  %v2 = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 1, i32 0, i32 0)
  %sub = fsub <2 x float> %add2, %v2
  ret <2 x float> %sub
}

; CHECK-LABEL: valu_insts_threshold2:
; CHECK-NOT: s_setprio
; CHECK: ; -- End function
define amdgpu_ps <2 x float> @valu_insts_threshold2(ptr addrspace(8) inreg %p, i32 inreg %i) "amdgpu-wave-priority-threshold"="5" {
  %v = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 0, i32 0)
  %add = fadd <2 x float> %v, %v
  %add2 = fadd <2 x float> %add, %add

  %cond = icmp eq i32 %i, 0
  br i1 %cond, label %a, label %b

a:
  ret <2 x float> %add2

b:
  %v2 = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 1, i32 0, i32 0)
  %sub = fsub <2 x float> %add2, %v2
  ret <2 x float> %sub
}

declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32) nounwind