llvm/llvm/test/CodeGen/NVPTX/rotate.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s --mtriple=nvptx64 -mcpu=sm_20 | FileCheck --check-prefix=SM20 %s
; RUN: llc < %s --mtriple=nvptx64 -mcpu=sm_35 | FileCheck --check-prefix=SM35 %s
; RUN: %if ptxas %{ llc < %s --mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s --mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %}


declare i32 @llvm.nvvm.rotate.b32(i32, i32)
declare i64 @llvm.nvvm.rotate.b64(i64, i32)
declare i64 @llvm.nvvm.rotate.right.b64(i64, i32)

; SM20: rotate32
; SM35: rotate32
define i32 @rotate32(i32 %a, i32 %b) {
; SM20-LABEL: rotate32(
; SM20:       {
; SM20-NEXT:    .reg .b32 %r<4>;
; SM20-EMPTY:
; SM20-NEXT:  // %bb.0:
; SM20-NEXT:    ld.param.u32 %r1, [rotate32_param_0];
; SM20-NEXT:    ld.param.u32 %r2, [rotate32_param_1];
; SM20-NEXT:    {
; SM20-NEXT:    .reg .b32 %lhs;
; SM20-NEXT:    .reg .b32 %rhs;
; SM20-NEXT:    .reg .b32 %amt2;
; SM20-NEXT:    shl.b32 %lhs, %r1, %r2;
; SM20-NEXT:    sub.s32 %amt2, 32, %r2;
; SM20-NEXT:    shr.b32 %rhs, %r1, %amt2;
; SM20-NEXT:    add.u32 %r3, %lhs, %rhs;
; SM20-NEXT:    }
; SM20-NEXT:    st.param.b32 [func_retval0+0], %r3;
; SM20-NEXT:    ret;
;
; SM35-LABEL: rotate32(
; SM35:       {
; SM35-NEXT:    .reg .b32 %r<4>;
; SM35-EMPTY:
; SM35-NEXT:  // %bb.0:
; SM35-NEXT:    ld.param.u32 %r1, [rotate32_param_0];
; SM35-NEXT:    ld.param.u32 %r2, [rotate32_param_1];
; SM35-NEXT:    shf.l.wrap.b32 %r3, %r1, %r1, %r2;
; SM35-NEXT:    st.param.b32 [func_retval0+0], %r3;
; SM35-NEXT:    ret;
  %val = tail call i32 @llvm.nvvm.rotate.b32(i32 %a, i32 %b)
  ret i32 %val
}

; SM20: rotate64
; SM35: rotate64
define i64 @rotate64(i64 %a, i32 %b) {
; SM20-LABEL: rotate64(
; SM20:       {
; SM20-NEXT:    .reg .b32 %r<2>;
; SM20-NEXT:    .reg .b64 %rd<3>;
; SM20-EMPTY:
; SM20-NEXT:  // %bb.0:
; SM20-NEXT:    ld.param.u64 %rd1, [rotate64_param_0];
; SM20-NEXT:    ld.param.u32 %r1, [rotate64_param_1];
; SM20-NEXT:    {
; SM20-NEXT:    .reg .b64 %lhs;
; SM20-NEXT:    .reg .b64 %rhs;
; SM20-NEXT:    .reg .u32 %amt2;
; SM20-NEXT:    and.b32 %amt2, %r1, 63;
; SM20-NEXT:    shl.b64 %lhs, %rd1, %amt2;
; SM20-NEXT:    sub.u32 %amt2, 64, %amt2;
; SM20-NEXT:    shr.b64 %rhs, %rd1, %amt2;
; SM20-NEXT:    add.u64 %rd2, %lhs, %rhs;
; SM20-NEXT:    }
; SM20-NEXT:    st.param.b64 [func_retval0+0], %rd2;
; SM20-NEXT:    ret;
;
; SM35-LABEL: rotate64(
; SM35:       {
; SM35-NEXT:    .reg .b32 %r<6>;
; SM35-NEXT:    .reg .b64 %rd<3>;
; SM35-EMPTY:
; SM35-NEXT:  // %bb.0:
; SM35-NEXT:    ld.param.u64 %rd1, [rotate64_param_0];
; SM35-NEXT:    {
; SM35-NEXT:    .reg .b32 %dummy;
; SM35-NEXT:    mov.b64 {%dummy,%r1}, %rd1;
; SM35-NEXT:    }
; SM35-NEXT:    {
; SM35-NEXT:    .reg .b32 %dummy;
; SM35-NEXT:    mov.b64 {%r2,%dummy}, %rd1;
; SM35-NEXT:    }
; SM35-NEXT:    ld.param.u32 %r3, [rotate64_param_1];
; SM35-NEXT:    shf.l.wrap.b32 %r4, %r2, %r1, %r3;
; SM35-NEXT:    shf.l.wrap.b32 %r5, %r1, %r2, %r3;
; SM35-NEXT:    mov.b64 %rd2, {%r5, %r4};
; SM35-NEXT:    st.param.b64 [func_retval0+0], %rd2;
; SM35-NEXT:    ret;
  %val = tail call i64 @llvm.nvvm.rotate.b64(i64 %a, i32 %b)
  ret i64 %val
}

; SM20: rotateright64
; SM35: rotateright64
define i64 @rotateright64(i64 %a, i32 %b) {
; SM20-LABEL: rotateright64(
; SM20:       {
; SM20-NEXT:    .reg .b32 %r<2>;
; SM20-NEXT:    .reg .b64 %rd<3>;
; SM20-EMPTY:
; SM20-NEXT:  // %bb.0:
; SM20-NEXT:    ld.param.u64 %rd1, [rotateright64_param_0];
; SM20-NEXT:    ld.param.u32 %r1, [rotateright64_param_1];
; SM20-NEXT:    {
; SM20-NEXT:    .reg .b64 %lhs;
; SM20-NEXT:    .reg .b64 %rhs;
; SM20-NEXT:    .reg .u32 %amt2;
; SM20-NEXT:    and.b32 %amt2, %r1, 63;
; SM20-NEXT:    shr.b64 %lhs, %rd1, %amt2;
; SM20-NEXT:    sub.u32 %amt2, 64, %amt2;
; SM20-NEXT:    shl.b64 %rhs, %rd1, %amt2;
; SM20-NEXT:    add.u64 %rd2, %lhs, %rhs;
; SM20-NEXT:    }
; SM20-NEXT:    st.param.b64 [func_retval0+0], %rd2;
; SM20-NEXT:    ret;
;
; SM35-LABEL: rotateright64(
; SM35:       {
; SM35-NEXT:    .reg .b32 %r<6>;
; SM35-NEXT:    .reg .b64 %rd<3>;
; SM35-EMPTY:
; SM35-NEXT:  // %bb.0:
; SM35-NEXT:    ld.param.u64 %rd1, [rotateright64_param_0];
; SM35-NEXT:    {
; SM35-NEXT:    .reg .b32 %dummy;
; SM35-NEXT:    mov.b64 {%r1,%dummy}, %rd1;
; SM35-NEXT:    }
; SM35-NEXT:    {
; SM35-NEXT:    .reg .b32 %dummy;
; SM35-NEXT:    mov.b64 {%dummy,%r2}, %rd1;
; SM35-NEXT:    }
; SM35-NEXT:    ld.param.u32 %r3, [rotateright64_param_1];
; SM35-NEXT:    shf.r.wrap.b32 %r4, %r2, %r1, %r3;
; SM35-NEXT:    shf.r.wrap.b32 %r5, %r1, %r2, %r3;
; SM35-NEXT:    mov.b64 %rd2, {%r5, %r4};
; SM35-NEXT:    st.param.b64 [func_retval0+0], %rd2;
; SM35-NEXT:    ret;
  %val = tail call i64 @llvm.nvvm.rotate.right.b64(i64 %a, i32 %b)
  ret i64 %val
}

; SM20: rotl0
; SM35: rotl0
define i32 @rotl0(i32 %x) {
; SM20-LABEL: rotl0(
; SM20:       {
; SM20-NEXT:    .reg .b32 %r<3>;
; SM20-EMPTY:
; SM20-NEXT:  // %bb.0:
; SM20-NEXT:    ld.param.u32 %r1, [rotl0_param_0];
; SM20-NEXT:    {
; SM20-NEXT:    .reg .b32 %lhs;
; SM20-NEXT:    .reg .b32 %rhs;
; SM20-NEXT:    shl.b32 %lhs, %r1, 8;
; SM20-NEXT:    shr.b32 %rhs, %r1, 24;
; SM20-NEXT:    add.u32 %r2, %lhs, %rhs;
; SM20-NEXT:    }
; SM20-NEXT:    st.param.b32 [func_retval0+0], %r2;
; SM20-NEXT:    ret;
;
; SM35-LABEL: rotl0(
; SM35:       {
; SM35-NEXT:    .reg .b32 %r<3>;
; SM35-EMPTY:
; SM35-NEXT:  // %bb.0:
; SM35-NEXT:    ld.param.u32 %r1, [rotl0_param_0];
; SM35-NEXT:    shf.l.wrap.b32 %r2, %r1, %r1, 8;
; SM35-NEXT:    st.param.b32 [func_retval0+0], %r2;
; SM35-NEXT:    ret;
  %t0 = shl i32 %x, 8
  %t1 = lshr i32 %x, 24
  %t2 = or i32 %t0, %t1
  ret i32 %t2
}

declare i64 @llvm.fshl.i64(i64, i64, i64)
declare i64 @llvm.fshr.i64(i64, i64, i64)

; SM35: rotl64
define i64 @rotl64(i64 %a, i64 %n) {
; SM20-LABEL: rotl64(
; SM20:       {
; SM20-NEXT:    .reg .b32 %r<2>;
; SM20-NEXT:    .reg .b64 %rd<3>;
; SM20-EMPTY:
; SM20-NEXT:  // %bb.0:
; SM20-NEXT:    ld.param.u64 %rd1, [rotl64_param_0];
; SM20-NEXT:    ld.param.u32 %r1, [rotl64_param_1];
; SM20-NEXT:    {
; SM20-NEXT:    .reg .b64 %lhs;
; SM20-NEXT:    .reg .b64 %rhs;
; SM20-NEXT:    .reg .u32 %amt2;
; SM20-NEXT:    and.b32 %amt2, %r1, 63;
; SM20-NEXT:    shl.b64 %lhs, %rd1, %amt2;
; SM20-NEXT:    sub.u32 %amt2, 64, %amt2;
; SM20-NEXT:    shr.b64 %rhs, %rd1, %amt2;
; SM20-NEXT:    add.u64 %rd2, %lhs, %rhs;
; SM20-NEXT:    }
; SM20-NEXT:    st.param.b64 [func_retval0+0], %rd2;
; SM20-NEXT:    ret;
;
; SM35-LABEL: rotl64(
; SM35:       {
; SM35-NEXT:    .reg .b32 %r<2>;
; SM35-NEXT:    .reg .b64 %rd<3>;
; SM35-EMPTY:
; SM35-NEXT:  // %bb.0:
; SM35-NEXT:    ld.param.u64 %rd1, [rotl64_param_0];
; SM35-NEXT:    ld.param.u32 %r1, [rotl64_param_1];
; SM35-NEXT:    {
; SM35-NEXT:    .reg .b64 %lhs;
; SM35-NEXT:    .reg .b64 %rhs;
; SM35-NEXT:    .reg .u32 %amt2;
; SM35-NEXT:    and.b32 %amt2, %r1, 63;
; SM35-NEXT:    shl.b64 %lhs, %rd1, %amt2;
; SM35-NEXT:    sub.u32 %amt2, 64, %amt2;
; SM35-NEXT:    shr.b64 %rhs, %rd1, %amt2;
; SM35-NEXT:    add.u64 %rd2, %lhs, %rhs;
; SM35-NEXT:    }
; SM35-NEXT:    st.param.b64 [func_retval0+0], %rd2;
; SM35-NEXT:    ret;
  %val = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %n)
  ret i64 %val
}

; SM35: rotl64_imm
define i64 @rotl64_imm(i64 %a) {
; SM20-LABEL: rotl64_imm(
; SM20:       {
; SM20-NEXT:    .reg .b64 %rd<3>;
; SM20-EMPTY:
; SM20-NEXT:  // %bb.0:
; SM20-NEXT:    ld.param.u64 %rd1, [rotl64_imm_param_0];
; SM20-NEXT:    {
; SM20-NEXT:    .reg .b64 %lhs;
; SM20-NEXT:    .reg .b64 %rhs;
; SM20-NEXT:    shl.b64 %lhs, %rd1, 2;
; SM20-NEXT:    shr.b64 %rhs, %rd1, 62;
; SM20-NEXT:    add.u64 %rd2, %lhs, %rhs;
; SM20-NEXT:    }
; SM20-NEXT:    st.param.b64 [func_retval0+0], %rd2;
; SM20-NEXT:    ret;
;
; SM35-LABEL: rotl64_imm(
; SM35:       {
; SM35-NEXT:    .reg .b64 %rd<3>;
; SM35-EMPTY:
; SM35-NEXT:  // %bb.0:
; SM35-NEXT:    ld.param.u64 %rd1, [rotl64_imm_param_0];
; SM35-NEXT:    {
; SM35-NEXT:    .reg .b64 %lhs;
; SM35-NEXT:    .reg .b64 %rhs;
; SM35-NEXT:    shl.b64 %lhs, %rd1, 2;
; SM35-NEXT:    shr.b64 %rhs, %rd1, 62;
; SM35-NEXT:    add.u64 %rd2, %lhs, %rhs;
; SM35-NEXT:    }
; SM35-NEXT:    st.param.b64 [func_retval0+0], %rd2;
; SM35-NEXT:    ret;
  %val = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 66)
  ret i64 %val
}

; SM35: rotr64
define i64 @rotr64(i64 %a, i64 %n) {
; SM20-LABEL: rotr64(
; SM20:       {
; SM20-NEXT:    .reg .b32 %r<2>;
; SM20-NEXT:    .reg .b64 %rd<3>;
; SM20-EMPTY:
; SM20-NEXT:  // %bb.0:
; SM20-NEXT:    ld.param.u64 %rd1, [rotr64_param_0];
; SM20-NEXT:    ld.param.u32 %r1, [rotr64_param_1];
; SM20-NEXT:    {
; SM20-NEXT:    .reg .b64 %lhs;
; SM20-NEXT:    .reg .b64 %rhs;
; SM20-NEXT:    .reg .u32 %amt2;
; SM20-NEXT:    and.b32 %amt2, %r1, 63;
; SM20-NEXT:    shr.b64 %lhs, %rd1, %amt2;
; SM20-NEXT:    sub.u32 %amt2, 64, %amt2;
; SM20-NEXT:    shl.b64 %rhs, %rd1, %amt2;
; SM20-NEXT:    add.u64 %rd2, %lhs, %rhs;
; SM20-NEXT:    }
; SM20-NEXT:    st.param.b64 [func_retval0+0], %rd2;
; SM20-NEXT:    ret;
;
; SM35-LABEL: rotr64(
; SM35:       {
; SM35-NEXT:    .reg .b32 %r<2>;
; SM35-NEXT:    .reg .b64 %rd<3>;
; SM35-EMPTY:
; SM35-NEXT:  // %bb.0:
; SM35-NEXT:    ld.param.u64 %rd1, [rotr64_param_0];
; SM35-NEXT:    ld.param.u32 %r1, [rotr64_param_1];
; SM35-NEXT:    {
; SM35-NEXT:    .reg .b64 %lhs;
; SM35-NEXT:    .reg .b64 %rhs;
; SM35-NEXT:    .reg .u32 %amt2;
; SM35-NEXT:    and.b32 %amt2, %r1, 63;
; SM35-NEXT:    shr.b64 %lhs, %rd1, %amt2;
; SM35-NEXT:    sub.u32 %amt2, 64, %amt2;
; SM35-NEXT:    shl.b64 %rhs, %rd1, %amt2;
; SM35-NEXT:    add.u64 %rd2, %lhs, %rhs;
; SM35-NEXT:    }
; SM35-NEXT:    st.param.b64 [func_retval0+0], %rd2;
; SM35-NEXT:    ret;
  %val = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %n)
  ret i64 %val
}

; SM35: rotr64_imm
define i64 @rotr64_imm(i64 %a) {
; SM20-LABEL: rotr64_imm(
; SM20:       {
; SM20-NEXT:    .reg .b64 %rd<3>;
; SM20-EMPTY:
; SM20-NEXT:  // %bb.0:
; SM20-NEXT:    ld.param.u64 %rd1, [rotr64_imm_param_0];
; SM20-NEXT:    {
; SM20-NEXT:    .reg .b64 %lhs;
; SM20-NEXT:    .reg .b64 %rhs;
; SM20-NEXT:    shl.b64 %lhs, %rd1, 62;
; SM20-NEXT:    shr.b64 %rhs, %rd1, 2;
; SM20-NEXT:    add.u64 %rd2, %lhs, %rhs;
; SM20-NEXT:    }
; SM20-NEXT:    st.param.b64 [func_retval0+0], %rd2;
; SM20-NEXT:    ret;
;
; SM35-LABEL: rotr64_imm(
; SM35:       {
; SM35-NEXT:    .reg .b64 %rd<3>;
; SM35-EMPTY:
; SM35-NEXT:  // %bb.0:
; SM35-NEXT:    ld.param.u64 %rd1, [rotr64_imm_param_0];
; SM35-NEXT:    {
; SM35-NEXT:    .reg .b64 %lhs;
; SM35-NEXT:    .reg .b64 %rhs;
; SM35-NEXT:    shl.b64 %lhs, %rd1, 62;
; SM35-NEXT:    shr.b64 %rhs, %rd1, 2;
; SM35-NEXT:    add.u64 %rd2, %lhs, %rhs;
; SM35-NEXT:    }
; SM35-NEXT:    st.param.b64 [func_retval0+0], %rd2;
; SM35-NEXT:    ret;
  %val = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 66)
  ret i64 %val
}