llvm/llvm/test/tools/llvm-split/AMDGPU/kernels-load-balancing.ll

; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s

; Test load balancing logic with 6 kernels.
;
; Kernels go from most expensive (A == 6) to least expensive (F == 1)
;
; Load balancing should work like this (current partition cost is in parens)
;
; Initial    -> [P0(0), P1(0), P2(0)]
;
; A(6) goes in 2 -> [P2(6), P0(0), P1(0)]
; B(5) goes in 1 -> [P2(6), P1(5), P0(4)]
; C(4) goes in 0 -> [P2(6), P1(5), P0(4)]

; D(3) goes in 0 -> [P0(7), P2(6), P1(5)]
; E(2) goes in 1 -> [P0(7), P1(7), P2(6)]
; F(1) goes in 2 -> [P0(7), P1(7), P2(7)]

; CHECK0-NOT: define
; CHECK0: define amdgpu_kernel void @C
; CHECK0: define amdgpu_kernel void @D
; CHECK0-NOT: define

; CHECK1-NOT: define
; CHECK1: define amdgpu_kernel void @B
; CHECK1: define amdgpu_kernel void @E
; CHECK1-NOT: define

; CHECK2-NOT: define
; CHECK2: define amdgpu_kernel void @A
; CHECK2: define amdgpu_kernel void @F
; CHECK2-NOT: define


define amdgpu_kernel void @A(ptr %x) {
  store i64 42, ptr %x
  store i64 43, ptr %x
  store i64 44, ptr %x
  store i64 45, ptr %x
  store i64 46, ptr %x
  ret void
}

define amdgpu_kernel void @B(ptr %x) {
  store i64 42, ptr %x
  store i64 43, ptr %x
  store i64 44, ptr %x
  store i64 45, ptr %x
  ret void
}

define amdgpu_kernel void @C(ptr %x) {
  store i64 42, ptr %x
  store i64 43, ptr %x
  store i64 44, ptr %x
  ret void
}

define amdgpu_kernel void @D(ptr %x) {
  store i64 42, ptr %x
  store i64 43, ptr %x
  ret void
}

define amdgpu_kernel void @E(ptr %x) {
  store i64 42, ptr %x
  ret void
}

define amdgpu_kernel void @F() {
  ret void
}