llvm/llvm/test/CodeGen/X86/combine-bswap.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64

; These tests just check that the plumbing is in place for @llvm.bswap. The
; actual output is massive at the moment as llvm.bswap is not yet legal.

declare i16 @llvm.bswap.i16(i16) readnone
declare i32 @llvm.bswap.i32(i32) readnone
declare i64 @llvm.bswap.i64(i64) readnone
declare i32 @llvm.bswap.v4i32(i32) readnone

; fold (bswap undef) -> undef
define i32 @test_undef() nounwind {
; X86-LABEL: test_undef:
; X86:       # %bb.0:
; X86-NEXT:    retl
;
; X64-LABEL: test_undef:
; X64:       # %bb.0:
; X64-NEXT:    retq
  %b = call i32 @llvm.bswap.i32(i32 undef)
  ret i32 %b
}

; fold (bswap (bswap x)) -> x
define i32 @test_bswap_bswap(i32 %a0) nounwind {
; X86-LABEL: test_bswap_bswap:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    retl
;
; X64-LABEL: test_bswap_bswap:
; X64:       # %bb.0:
; X64-NEXT:    movl %edi, %eax
; X64-NEXT:    retq
  %b = call i32 @llvm.bswap.i32(i32 %a0)
  %c = call i32 @llvm.bswap.i32(i32 %b)
  ret i32 %c
}

define i16 @test_bswap_srli_8_bswap_i16(i16 %a) nounwind {
; X86-LABEL: test_bswap_srli_8_bswap_i16:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    shll $8, %eax
; X86-NEXT:    # kill: def $ax killed $ax killed $eax
; X86-NEXT:    retl
;
; X64-LABEL: test_bswap_srli_8_bswap_i16:
; X64:       # %bb.0:
; X64-NEXT:    movl %edi, %eax
; X64-NEXT:    shll $8, %eax
; X64-NEXT:    # kill: def $ax killed $ax killed $eax
; X64-NEXT:    retq
    %1 = call i16 @llvm.bswap.i16(i16 %a)
    %2 = lshr i16 %1, 8
    %3 = call i16 @llvm.bswap.i16(i16 %2)
    ret i16 %3
}

define i32 @test_bswap_srli_8_bswap_i32(i32 %a) nounwind {
; X86-LABEL: test_bswap_srli_8_bswap_i32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    shll $8, %eax
; X86-NEXT:    retl
;
; X64-LABEL: test_bswap_srli_8_bswap_i32:
; X64:       # %bb.0:
; X64-NEXT:    movl %edi, %eax
; X64-NEXT:    shll $8, %eax
; X64-NEXT:    retq
    %1 = call i32 @llvm.bswap.i32(i32 %a)
    %2 = lshr i32 %1, 8
    %3 = call i32 @llvm.bswap.i32(i32 %2)
    ret i32 %3
}

define i64 @test_bswap_srli_16_bswap_i64(i64 %a) nounwind {
; X86-LABEL: test_bswap_srli_16_bswap_i64:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
; X86-NEXT:    shll $16, %eax
; X86-NEXT:    retl
;
; X64-LABEL: test_bswap_srli_16_bswap_i64:
; X64:       # %bb.0:
; X64-NEXT:    movq %rdi, %rax
; X64-NEXT:    shlq $16, %rax
; X64-NEXT:    retq
    %1 = call i64 @llvm.bswap.i64(i64 %a)
    %2 = lshr i64 %1, 16
    %3 = call i64 @llvm.bswap.i64(i64 %2)
    ret i64 %3
}

define i16 @test_bswap_shli_8_bswap_i16(i16 %a) nounwind {
; X86-LABEL: test_bswap_shli_8_bswap_i16:
; X86:       # %bb.0:
; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    # kill: def $ax killed $ax killed $eax
; X86-NEXT:    retl
;
; X64-LABEL: test_bswap_shli_8_bswap_i16:
; X64:       # %bb.0:
; X64-NEXT:    movl %edi, %eax
; X64-NEXT:    movzbl %ah, %eax
; X64-NEXT:    # kill: def $ax killed $ax killed $eax
; X64-NEXT:    retq
    %1 = call i16 @llvm.bswap.i16(i16 %a)
    %2 = shl i16 %1, 8
    %3 = call i16 @llvm.bswap.i16(i16 %2)
    ret i16 %3
}

define i32 @test_bswap_shli_8_bswap_i32(i32 %a) nounwind {
; X86-LABEL: test_bswap_shli_8_bswap_i32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    shrl $8, %eax
; X86-NEXT:    retl
;
; X64-LABEL: test_bswap_shli_8_bswap_i32:
; X64:       # %bb.0:
; X64-NEXT:    movl %edi, %eax
; X64-NEXT:    shrl $8, %eax
; X64-NEXT:    retq
    %1 = call i32 @llvm.bswap.i32(i32 %a)
    %2 = shl i32 %1, 8
    %3 = call i32 @llvm.bswap.i32(i32 %2)
    ret i32 %3
}

define i64 @test_bswap_shli_16_bswap_i64(i64 %a) nounwind {
; X86-LABEL: test_bswap_shli_16_bswap_i64:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
; X86-NEXT:    retl
;
; X64-LABEL: test_bswap_shli_16_bswap_i64:
; X64:       # %bb.0:
; X64-NEXT:    movq %rdi, %rax
; X64-NEXT:    shrq $16, %rax
; X64-NEXT:    retq
    %1 = call i64 @llvm.bswap.i64(i64 %a)
    %2 = shl i64 %1, 16
    %3 = call i64 @llvm.bswap.i64(i64 %2)
    ret i64 %3
}

define i32 @test_demandedbits_bswap(i32 %a0) nounwind {
; X86-LABEL: test_demandedbits_bswap:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    bswapl %eax
; X86-NEXT:    andl $-65536, %eax # imm = 0xFFFF0000
; X86-NEXT:    retl
;
; X64-LABEL: test_demandedbits_bswap:
; X64:       # %bb.0:
; X64-NEXT:    movl %edi, %eax
; X64-NEXT:    bswapl %eax
; X64-NEXT:    andl $-65536, %eax # imm = 0xFFFF0000
; X64-NEXT:    retq
  %b = or i32 %a0, 4278190080
  %c = call i32 @llvm.bswap.i32(i32 %b)
  %d = and i32 %c, 4294901760
  ret i32 %d
}

define void @demand_one_loaded_byte(ptr %xp, ptr %yp) {
; X86-LABEL: demand_one_loaded_byte:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movzbl 4(%ecx), %ecx
; X86-NEXT:    movb %cl, (%eax)
; X86-NEXT:    retl
;
; X64-LABEL: demand_one_loaded_byte:
; X64:       # %bb.0:
; X64-NEXT:    movzbl 4(%rdi), %eax
; X64-NEXT:    movb %al, (%rsi)
; X64-NEXT:    retq
  %x = load i64, ptr %xp, align 8
  %x_zzzz7654 = lshr i64 %x, 32
  %x_z7654zzz = shl nuw nsw i64 %x_zzzz7654, 24
  %x_4zzz = trunc i64 %x_z7654zzz to i32
  %y = load i32, ptr %yp, align 4
  %y_321z = and i32 %y, -256
  %x_zzz4 = call i32 @llvm.bswap.i32(i32 %x_4zzz)
  %r = or i32 %x_zzz4, %y_321z
  store i32 %r, ptr %yp, align 4
  ret void
}

define i64 @test_bswap64_shift48_zext(i16 %a0) {
; X86-LABEL: test_bswap64_shift48_zext:
; X86:       # %bb.0:
; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    rolw $8, %ax
; X86-NEXT:    movzwl %ax, %eax
; X86-NEXT:    xorl %edx, %edx
; X86-NEXT:    retl
;
; X64-LABEL: test_bswap64_shift48_zext:
; X64:       # %bb.0:
; X64-NEXT:    rolw $8, %di
; X64-NEXT:    movzwl %di, %eax
; X64-NEXT:    retq
  %z = zext i16 %a0 to i64
  %s = shl i64 %z, 48
  %b = call i64 @llvm.bswap.i64(i64 %s)
  ret i64 %b
}

define i64 @test_bswap64_shift48(i64 %a0) {
; X86-LABEL: test_bswap64_shift48:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    rolw $8, %ax
; X86-NEXT:    movzwl %ax, %eax
; X86-NEXT:    xorl %edx, %edx
; X86-NEXT:    retl
;
; X64-LABEL: test_bswap64_shift48:
; X64:       # %bb.0:
; X64-NEXT:    rolw $8, %di
; X64-NEXT:    movzwl %di, %eax
; X64-NEXT:    retq
  %s = shl i64 %a0, 48
  %b = call i64 @llvm.bswap.i64(i64 %s)
  ret i64 %b
}

define i32 @test_bswap32_shift17(i32 %a0) {
; X86-LABEL: test_bswap32_shift17:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    shll $17, %eax
; X86-NEXT:    bswapl %eax
; X86-NEXT:    retl
;
; X64-LABEL: test_bswap32_shift17:
; X64:       # %bb.0:
; X64-NEXT:    movl %edi, %eax
; X64-NEXT:    shll $17, %eax
; X64-NEXT:    bswapl %eax
; X64-NEXT:    retq
  %s = shl i32 %a0, 17
  %b = call i32 @llvm.bswap.i32(i32 %s)
  ret i32 %b
}

define i32 @bs_and_lhs_bs32(i32 %a, i32 %b) #0 {
; X86-LABEL: bs_and_lhs_bs32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    bswapl %eax
; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    retl
;
; X64-LABEL: bs_and_lhs_bs32:
; X64:       # %bb.0:
; X64-NEXT:    movl %esi, %eax
; X64-NEXT:    bswapl %eax
; X64-NEXT:    andl %edi, %eax
; X64-NEXT:    retq
  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
  %2 = and i32 %1, %b
  %3 = tail call i32 @llvm.bswap.i32(i32 %2)
  ret i32 %3
}

define i64 @bs_or_lhs_bs64(i64 %a, i64 %b) #0 {
; X86-LABEL: bs_or_lhs_bs64:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    bswapl %eax
; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    bswapl %edx
; X86-NEXT:    orl {{[0-9]+}}(%esp), %edx
; X86-NEXT:    retl
;
; X64-LABEL: bs_or_lhs_bs64:
; X64:       # %bb.0:
; X64-NEXT:    movq %rsi, %rax
; X64-NEXT:    bswapq %rax
; X64-NEXT:    orq %rdi, %rax
; X64-NEXT:    retq
  %1 = tail call i64 @llvm.bswap.i64(i64 %a)
  %2 = or i64 %1, %b
  %3 = tail call i64 @llvm.bswap.i64(i64 %2)
  ret i64 %3
}

define i64 @bs_xor_rhs_bs64(i64 %a, i64 %b) #0 {
; X86-LABEL: bs_xor_rhs_bs64:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    bswapl %eax
; X86-NEXT:    xorl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    bswapl %edx
; X86-NEXT:    xorl {{[0-9]+}}(%esp), %edx
; X86-NEXT:    retl
;
; X64-LABEL: bs_xor_rhs_bs64:
; X64:       # %bb.0:
; X64-NEXT:    movq %rdi, %rax
; X64-NEXT:    bswapq %rax
; X64-NEXT:    xorq %rsi, %rax
; X64-NEXT:    retq
  %1 = tail call i64 @llvm.bswap.i64(i64 %b)
  %2 = xor i64 %a, %1
  %3 = tail call i64 @llvm.bswap.i64(i64 %2)
  ret i64 %3
}

define i32 @bs_and_all_operand_multiuse(i32 %a, i32 %b) #0 {
; X86-LABEL: bs_and_all_operand_multiuse:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl %eax, %edx
; X86-NEXT:    bswapl %edx
; X86-NEXT:    andl %ecx, %eax
; X86-NEXT:    bswapl %ecx
; X86-NEXT:    imull %edx, %eax
; X86-NEXT:    imull %ecx, %eax
; X86-NEXT:    retl
;
; X64-LABEL: bs_and_all_operand_multiuse:
; X64:       # %bb.0:
; X64-NEXT:    movl %edi, %eax
; X64-NEXT:    bswapl %eax
; X64-NEXT:    andl %esi, %edi
; X64-NEXT:    bswapl %esi
; X64-NEXT:    imull %edi, %eax
; X64-NEXT:    imull %esi, %eax
; X64-NEXT:    retq
  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
  %2 = tail call i32 @llvm.bswap.i32(i32 %b)
  %3 = and i32 %1, %2
  %4 = tail call i32 @llvm.bswap.i32(i32 %3)
  %5 = mul i32 %1, %4 ;increase use of left bswap
  %6 = mul i32 %2, %5 ;increase use of right bswap

  ret i32 %6
}

; negative test
define i32 @bs_and_rhs_bs32_multiuse1(i32 %a, i32 %b) #0 {
; X86-LABEL: bs_and_rhs_bs32_multiuse1:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    bswapl %ecx
; X86-NEXT:    andl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl %ecx, %eax
; X86-NEXT:    bswapl %eax
; X86-NEXT:    imull %ecx, %eax
; X86-NEXT:    retl
;
; X64-LABEL: bs_and_rhs_bs32_multiuse1:
; X64:       # %bb.0:
; X64-NEXT:    bswapl %esi
; X64-NEXT:    andl %edi, %esi
; X64-NEXT:    movl %esi, %eax
; X64-NEXT:    bswapl %eax
; X64-NEXT:    imull %esi, %eax
; X64-NEXT:    retq
  %1 = tail call i32 @llvm.bswap.i32(i32 %b)
  %2 = and i32 %1, %a
  %3 = tail call i32 @llvm.bswap.i32(i32 %2)
  %4 = mul i32 %2, %3 ;increase use of logical op
  ret i32 %4
}

; negative test
define i32 @bs_and_rhs_bs32_multiuse2(i32 %a, i32 %b) #0 {
; X86-LABEL: bs_and_rhs_bs32_multiuse2:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    bswapl %ecx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    andl %ecx, %eax
; X86-NEXT:    bswapl %eax
; X86-NEXT:    imull %ecx, %eax
; X86-NEXT:    retl
;
; X64-LABEL: bs_and_rhs_bs32_multiuse2:
; X64:       # %bb.0:
; X64-NEXT:    movl %edi, %eax
; X64-NEXT:    bswapl %esi
; X64-NEXT:    andl %esi, %eax
; X64-NEXT:    bswapl %eax
; X64-NEXT:    imull %esi, %eax
; X64-NEXT:    retq
  %1 = tail call i32 @llvm.bswap.i32(i32 %b)
  %2 = and i32 %1, %a
  %3 = tail call i32 @llvm.bswap.i32(i32 %2)
  %4 = mul i32 %1, %3 ;increase use of inner bswap
  ret i32 %4
}

; negative test
define i64 @test_bswap64_shift17(i64 %a0) {
; X86-LABEL: test_bswap64_shift17:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    shldl $17, %edx, %eax
; X86-NEXT:    shll $17, %edx
; X86-NEXT:    bswapl %eax
; X86-NEXT:    bswapl %edx
; X86-NEXT:    retl
;
; X64-LABEL: test_bswap64_shift17:
; X64:       # %bb.0:
; X64-NEXT:    movq %rdi, %rax
; X64-NEXT:    shlq $17, %rax
; X64-NEXT:    bswapq %rax
; X64-NEXT:    retq
  %s = shl i64 %a0, 17
  %b = call i64 @llvm.bswap.i64(i64 %s)
  ret i64 %b
}

; negative test
define i64 @test_bswap64_shift48_multiuse(i64 %a0, ptr %a1) {
; X86-LABEL: test_bswap64_shift48_multiuse:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    shll $16, %eax
; X86-NEXT:    movl %eax, 4(%ecx)
; X86-NEXT:    bswapl %eax
; X86-NEXT:    movl %eax, (%ecx)
; X86-NEXT:    xorl %edx, %edx
; X86-NEXT:    retl
;
; X64-LABEL: test_bswap64_shift48_multiuse:
; X64:       # %bb.0:
; X64-NEXT:    shlq $48, %rdi
; X64-NEXT:    movq %rdi, %rax
; X64-NEXT:    bswapq %rax
; X64-NEXT:    orq %rax, %rdi
; X64-NEXT:    movq %rdi, (%rsi)
; X64-NEXT:    retq
  %s = shl i64 %a0, 48
  %b = call i64 @llvm.bswap.i64(i64 %s)
  %a = add i64 %s, %b
  store i64 %a, ptr %a1
  ret i64 %b
}