; RUN: opt -mtriple=amdgcn-- -passes='loop(simple-loop-unswitch<nontrivial>),verify<loops>' -S < %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-- -passes='loop-mssa(simple-loop-unswitch<nontrivial>),verify<loops>' -S < %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-- -passes='simple-loop-unswitch<nontrivial>' -verify-memoryssa -S < %s | FileCheck %s
declare i32 @a()
declare i32 @b()
declare i32 @c()
; Non-trivial loop unswitching where there are two distinct trivial
; conditions to unswitch within the loop. The conditions are divergent
; and should not unswitch.
define void @test1(ptr %ptr, i1 %cond1, i1 %cond2) {
; CHECK-LABEL: @test1(
entry:
br label %loop_begin
; CHECK-NEXT: entry:
; CHECK-NEXT: br label %loop_begin
loop_begin:
br i1 %cond1, label %loop_a, label %loop_b
; CHECK: loop_begin:
; CHECK-NEXT: br i1 %cond1, label %loop_a, label %loop_b
loop_a:
%unused.a = call i32 @a()
br label %latch
; CHECK: loop_a:
; CHECK-NEXT: %unused.a = call i32 @a()
; CHECK-NEXT: br label %latch
loop_b:
br i1 %cond2, label %loop_b_a, label %loop_b_b
; CHECK: loop_b:
; CHECK-NEXT: br i1 %cond2, label %loop_b_a, label %loop_b_b
loop_b_a:
%unused.b = call i32 @b()
br label %latch
; CHECK: loop_b_a:
; CHECK-NEXT: %unused.b = call i32 @b()
; CHECK-NEXT: br label %latch
loop_b_b:
%unused.c = call i32 @c()
br label %latch
; CHECK: loop_b_b:
; CHECK-NEXT: %unused.c = call i32 @c()
; CHECK-NEXT: br label %latch
latch:
%v = load i1, ptr %ptr
br i1 %v, label %loop_begin, label %loop_exit
; CHECK: latch:
; CHECK-NEXT: %v = load i1, ptr %ptr
; CHECK-NEXT: br i1 %v, label %loop_begin, label %loop_exit
loop_exit:
ret void
; CHECK: loop_exit:
; CHECK-NEXT: ret void
}
; Non-trivial loop unswitching where there are two distinct trivial
; conditions to unswitch within the loop. The conditions are known to
; be uniform, so it should be unswitchable. However, unswitch
; currently does not make use of UniformityAnalysis.
define amdgpu_kernel void @test1_uniform(ptr %ptr, i1 %cond1, i1 %cond2) {
; CHECK-LABEL: @test1_uniform(
entry:
br label %loop_begin
; CHECK-NEXT: entry:
; CHECK-NEXT: br label %loop_begin
loop_begin:
br i1 %cond1, label %loop_a, label %loop_b
; CHECK: loop_begin:
; CHECK-NEXT: br i1 %cond1, label %loop_a, label %loop_b
loop_a:
%unused.a = call i32 @a()
br label %latch
; CHECK: loop_a:
; CHECK-NEXT: %unused.a = call i32 @a()
; CHECK-NEXT: br label %latch
loop_b:
br i1 %cond2, label %loop_b_a, label %loop_b_b
; CHECK: loop_b:
; CHECK-NEXT: br i1 %cond2, label %loop_b_a, label %loop_b_b
loop_b_a:
%unused.b = call i32 @b()
br label %latch
; CHECK: loop_b_a:
; CHECK-NEXT: %unused.b = call i32 @b()
; CHECK-NEXT: br label %latch
loop_b_b:
%unused.c = call i32 @c()
br label %latch
; CHECK: loop_b_b:
; CHECK-NEXT: %unused.c = call i32 @c()
; CHECK-NEXT: br label %latch
latch:
%v = load i1, ptr %ptr
br i1 %v, label %loop_begin, label %loop_exit
; CHECK: latch:
; CHECK-NEXT: %v = load i1, ptr %ptr
; CHECK-NEXT: br i1 %v, label %loop_begin, label %loop_exit
loop_exit:
ret void
; CHECK: loop_exit:
; CHECK-NEXT: ret void
}
; Non-trivial loop unswitching where there are two distinct trivial
; conditions to unswitch within the loop. There is no divergence
; because it's assumed it can only execute with a workgroup of size 1.
define void @test1_single_lane_execution(ptr %ptr, i1 %cond1, i1 %cond2) #0 {
; CHECK-LABEL: @test1_single_lane_execution(
entry:
br label %loop_begin
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split
loop_begin:
br i1 %cond1, label %loop_a, label %loop_b
loop_a:
call i32 @a()
br label %latch
; The 'loop_a' unswitched loop.
;
; CHECK: entry.split.us:
; CHECK-NEXT: br label %loop_begin.us
;
; CHECK: loop_begin.us:
; CHECK-NEXT: br label %loop_a.us
;
; CHECK: loop_a.us:
; CHECK-NEXT: call i32 @a()
; CHECK-NEXT: br label %latch.us
;
; CHECK: latch.us:
; CHECK-NEXT: %[[V:.*]] = load i1, ptr %ptr
; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
;
; CHECK: loop_exit.split.us:
; CHECK-NEXT: br label %loop_exit
loop_b:
br i1 %cond2, label %loop_b_a, label %loop_b_b
; The second unswitched condition.
;
; CHECK: entry.split:
; CHECK-NEXT: br i1 %cond2, label %entry.split.split.us, label %entry.split.split
loop_b_a:
call i32 @b()
br label %latch
; The 'loop_b_a' unswitched loop.
;
; CHECK: entry.split.split.us:
; CHECK-NEXT: br label %loop_begin.us1
;
; CHECK: loop_begin.us1:
; CHECK-NEXT: br label %loop_b.us
;
; CHECK: loop_b.us:
; CHECK-NEXT: br label %loop_b_a.us
;
; CHECK: loop_b_a.us:
; CHECK-NEXT: call i32 @b()
; CHECK-NEXT: br label %latch.us2
;
; CHECK: latch.us2:
; CHECK-NEXT: %[[V:.*]] = load i1, ptr %ptr
; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us1, label %loop_exit.split.split.us
;
; CHECK: loop_exit.split.split.us:
; CHECK-NEXT: br label %loop_exit.split
loop_b_b:
call i32 @c()
br label %latch
; The 'loop_b_b' unswitched loop.
;
; CHECK: entry.split.split:
; CHECK-NEXT: br label %loop_begin
;
; CHECK: loop_begin:
; CHECK-NEXT: br label %loop_b
;
; CHECK: loop_b:
; CHECK-NEXT: br label %loop_b_b
;
; CHECK: loop_b_b:
; CHECK-NEXT: call i32 @c()
; CHECK-NEXT: br label %latch
;
; CHECK: latch:
; CHECK-NEXT: %[[V:.*]] = load i1, ptr %ptr
; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split.split
;
; CHECK: loop_exit.split.split:
; CHECK-NEXT: br label %loop_exit.split
latch:
%v = load i1, ptr %ptr
br i1 %v, label %loop_begin, label %loop_exit
loop_exit:
ret void
; CHECK: loop_exit.split:
; CHECK-NEXT: br label %loop_exit
;
; CHECK: loop_exit:
; CHECK-NEXT: ret
}
attributes #0 = { "amdgpu-flat-work-group-size"="1,1" }