llvm/llvm/test/CodeGen/WebAssembly/simd-bitmask.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mattr=+simd128 | FileCheck %s

;; Test that SIMD bitmask instruction can be selected

target triple = "wasm32-unknown-unknown"

define i16 @bitmask_v16i8(<16 x i8> %v) {
; CHECK-LABEL: bitmask_v16i8:
; CHECK:         .functype bitmask_v16i8 (v128) -> (i32)
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT:    i8x16.eq
; CHECK-NEXT:    i8x16.bitmask
; CHECK-NEXT:    # fallthrough-return
  %cmp = icmp eq <16 x i8> %v, zeroinitializer
  %bitmask = bitcast <16 x i1> %cmp to i16
  ret i16 %bitmask
}

define i8 @bitmask_v8i16(<8 x i16> %v) {
; CHECK-LABEL: bitmask_v8i16:
; CHECK:         .functype bitmask_v8i16 (v128) -> (i32)
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    v128.const 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT:    i16x8.eq
; CHECK-NEXT:    i16x8.bitmask
; CHECK-NEXT:    # fallthrough-return
  %cmp = icmp eq <8 x i16> %v, zeroinitializer
  %bitmask = bitcast <8 x i1> %cmp to i8
  ret i8 %bitmask
}

define i8 @bitmask_v4i32(<4 x i32> %v) {
; CHECK-LABEL: bitmask_v4i32:
; CHECK:         .functype bitmask_v4i32 (v128) -> (i32)
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    v128.const 0, 0, 0, 0
; CHECK-NEXT:    i32x4.eq
; CHECK-NEXT:    i32x4.bitmask
; CHECK-NEXT:    # fallthrough-return
  %cmp = icmp eq <4 x i32> %v, zeroinitializer
  %bitmask = bitcast <4 x i1> %cmp to i4
  %ext = zext i4 %bitmask to i8
  ret i8 %ext
}

define i8 @bitmask_v2i64(<2 x i64> %v) {
; CHECK-LABEL: bitmask_v2i64:
; CHECK:         .functype bitmask_v2i64 (v128) -> (i32)
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    v128.const 0, 0
; CHECK-NEXT:    i64x2.eq
; CHECK-NEXT:    i64x2.bitmask
; CHECK-NEXT:    # fallthrough-return
  %cmp = icmp eq <2 x i64> %v, zeroinitializer
  %bitmask = bitcast <2 x i1> %cmp to i2
  %ext = zext i2 %bitmask to i8
  ret i8 %ext
}

;; Test unusual vectors

define i1 @bitmask_v1i8(<1 x i8> %v) {
; CHECK-LABEL: bitmask_v1i8:
; CHECK:         .functype bitmask_v1i8 (v128) -> (i32)
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 0
; CHECK-NEXT:    i32.eqz
; CHECK-NEXT:    # fallthrough-return
  %cmp = icmp eq <1 x i8> %v, zeroinitializer
  %bitmask = bitcast <1 x i1> %cmp to i1
  ret i1 %bitmask
}

define i7 @bitmask_v7i8(<7 x i8> %v) {
; CHECK-LABEL: bitmask_v7i8:
; CHECK:         .functype bitmask_v7i8 (i32, i32, i32, i32, i32, i32, i32) -> (i32)
; CHECK-NEXT:    .local v128
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    global.get __stack_pointer
; CHECK-NEXT:    i32.const 16
; CHECK-NEXT:    i32.sub
; CHECK-NEXT:    drop
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.splat
; CHECK-NEXT:    local.get 1
; CHECK-NEXT:    i8x16.replace_lane 1
; CHECK-NEXT:    local.get 2
; CHECK-NEXT:    i8x16.replace_lane 2
; CHECK-NEXT:    local.get 3
; CHECK-NEXT:    i8x16.replace_lane 3
; CHECK-NEXT:    local.get 4
; CHECK-NEXT:    i8x16.replace_lane 4
; CHECK-NEXT:    local.get 5
; CHECK-NEXT:    i8x16.replace_lane 5
; CHECK-NEXT:    local.get 6
; CHECK-NEXT:    i8x16.replace_lane 6
; CHECK-NEXT:    v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT:    i8x16.eq
; CHECK-NEXT:    local.tee 7
; CHECK-NEXT:    i16x8.extract_lane_u 0
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    local.get 7
; CHECK-NEXT:    i16x8.extend_low_i8x16_s
; CHECK-NEXT:    local.tee 7
; CHECK-NEXT:    i16x8.extract_lane_u 1
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 7
; CHECK-NEXT:    i16x8.extract_lane_u 2
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 2
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 7
; CHECK-NEXT:    i16x8.extract_lane_u 3
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 3
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 7
; CHECK-NEXT:    i16x8.extract_lane_u 4
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 4
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 7
; CHECK-NEXT:    i16x8.extract_lane_u 5
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 5
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 7
; CHECK-NEXT:    i16x8.extract_lane_u 6
; CHECK-NEXT:    i32.const 6
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    i32.const 127
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    # fallthrough-return
  %cmp = icmp eq <7 x i8> %v, zeroinitializer
  %bitmask = bitcast <7 x i1> %cmp to i7
  ret i7 %bitmask
}

define i8 @bitmask_v8i8(<8 x i8> %v) {
; CHECK-LABEL: bitmask_v8i8:
; CHECK:         .functype bitmask_v8i8 (v128) -> (i32)
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT:    i8x16.eq
; CHECK-NEXT:    i16x8.extend_low_i8x16_s
; CHECK-NEXT:    i16x8.bitmask
; CHECK-NEXT:    # fallthrough-return
  %cmp = icmp eq <8 x i8> %v, zeroinitializer
  %bitmask = bitcast <8 x i1> %cmp to i8
  ret i8 %bitmask
}

define i32 @bitmask_v32i8(<32 x i8> %v) {
; CHECK-LABEL: bitmask_v32i8:
; CHECK:         .functype bitmask_v32i8 (v128, v128) -> (i32)
; CHECK-NEXT:    .local v128
; CHECK-NEXT:  # %bb.0:
; CHECK-NEXT:    global.get __stack_pointer
; CHECK-NEXT:    i32.const 16
; CHECK-NEXT:    i32.sub
; CHECK-NEXT:    drop
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT:    local.tee 2
; CHECK-NEXT:    i8x16.eq
; CHECK-NEXT:    local.tee 0
; CHECK-NEXT:    i8x16.extract_lane_u 0
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 1
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 2
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 2
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 3
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 3
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 4
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 4
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 5
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 5
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 6
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 6
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 7
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 7
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 8
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 8
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 9
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 9
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 10
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 10
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 11
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 11
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 12
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 12
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 13
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 13
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 14
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 14
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 15
; CHECK-NEXT:    i32.const 15
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    i32.const 65535
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    local.get 1
; CHECK-NEXT:    local.get 2
; CHECK-NEXT:    i8x16.eq
; CHECK-NEXT:    local.tee 0
; CHECK-NEXT:    i8x16.extract_lane_u 15
; CHECK-NEXT:    i32.const 31
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 14
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 30
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 13
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 29
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 12
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 28
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 11
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 27
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 10
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 26
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 9
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 25
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 8
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 24
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 7
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 23
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 6
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 22
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 5
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 21
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 4
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 20
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 3
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 19
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 2
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 18
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 1
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 17
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i8x16.extract_lane_u 0
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    i32.const 16
; CHECK-NEXT:    i32.shl
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    i32.or
; CHECK-NEXT:    # fallthrough-return
  %cmp = icmp eq <32 x i8> %v, zeroinitializer
  %bitmask = bitcast <32 x i1> %cmp to i32
  ret i32 %bitmask
}