llvm/llvm/test/CodeGen/AArch64/rcpc3.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=aarch64 -mattr=+v8.9a -mattr=+rcpc3 < %s | FileCheck --check-prefixes=BOTH,RCPC3 %s
; RUN: llc -mtriple=aarch64 -mattr=+v8.9a < %s | FileCheck --check-prefixes=BOTH,NO-RCPC3 %s

define hidden <2 x i64> @test_ldap1_2xi64_lane0(ptr nocapture noundef readonly %a, <2 x i64> noundef %b) local_unnamed_addr {
;
; RCPC3-LABEL: test_ldap1_2xi64_lane0:
; RCPC3:       // %bb.0:
; RCPC3-NEXT:    ldap1 { v0.d }[0], [x0]
; RCPC3-NEXT:    ret
;
; NO-RCPC3-LABEL: test_ldap1_2xi64_lane0:
; NO-RCPC3:       // %bb.0:
; NO-RCPC3-NEXT:    ldapr x8, [x0]
; NO-RCPC3-NEXT:    mov v0.d[0], x8
; NO-RCPC3-NEXT:    ret
  %1 = load atomic i64, ptr %a acquire, align 8
  %ldap1 = insertelement <2 x i64> %b, i64 %1, i64 0
  ret <2 x i64> %ldap1
}

define hidden <2 x i64> @test_ldap1_2xi64_lane1(ptr nocapture noundef readonly %a, <2 x i64> noundef %b) local_unnamed_addr {
;
; RCPC3-LABEL: test_ldap1_2xi64_lane1:
; RCPC3:       // %bb.0:
; RCPC3-NEXT:    ldap1 { v0.d }[1], [x0]
; RCPC3-NEXT:    ret
;
; NO-RCPC3-LABEL: test_ldap1_2xi64_lane1:
; NO-RCPC3:       // %bb.0:
; NO-RCPC3-NEXT:    ldapr x8, [x0]
; NO-RCPC3-NEXT:    mov v0.d[1], x8
; NO-RCPC3-NEXT:    ret
  %1 = load atomic i64, ptr %a acquire, align 8
  %ldap1 = insertelement <2 x i64> %b, i64 %1, i64 1
  ret <2 x i64> %ldap1
}

define hidden nofpclass(nan inf) <2 x double> @test_ldap1_2xdouble_lane0(ptr nocapture noundef readonly %a, <2 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr {
;
; RCPC3-LABEL: test_ldap1_2xdouble_lane0:
; RCPC3:       // %bb.0:
; RCPC3-NEXT:    ldap1 { v0.d }[0], [x0]
; RCPC3-NEXT:    ret
;
; NO-RCPC3-LABEL: test_ldap1_2xdouble_lane0:
; NO-RCPC3:       // %bb.0:
; NO-RCPC3-NEXT:    ldapr x8, [x0]
; NO-RCPC3-NEXT:    fmov d1, x8
; NO-RCPC3-NEXT:    mov v0.d[0], v1.d[0]
; NO-RCPC3-NEXT:    ret
  %1 = load atomic double, ptr %a acquire, align 8
  %ldap1 = insertelement <2 x double> %b, double %1, i64 0
  ret <2 x double> %ldap1
}

define hidden nofpclass(nan inf) <2 x double> @test_ldap1_2xdouble_lane1(ptr nocapture noundef readonly %a, <2 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr {
;
; RCPC3-LABEL: test_ldap1_2xdouble_lane1:
; RCPC3:       // %bb.0:
; RCPC3-NEXT:    ldap1 { v0.d }[1], [x0]
; RCPC3-NEXT:    ret
;
; NO-RCPC3-LABEL: test_ldap1_2xdouble_lane1:
; NO-RCPC3:       // %bb.0:
; NO-RCPC3-NEXT:    ldapr x8, [x0]
; NO-RCPC3-NEXT:    fmov d1, x8
; NO-RCPC3-NEXT:    mov v0.d[1], v1.d[0]
; NO-RCPC3-NEXT:    ret
  %1 = load atomic double, ptr %a acquire, align 8
  %ldap1 = insertelement <2 x double> %b, double %1, i64 1
  ret <2 x double> %ldap1
}

define hidden <1 x i64> @test_ldap1_1xi64_lane0(ptr nocapture noundef readonly %a, <1 x i64> noundef %b) local_unnamed_addr {
;
; RCPC3-LABEL: test_ldap1_1xi64_lane0:
; RCPC3:       // %bb.0:
; RCPC3-NEXT:    ldap1 { v0.d }[0], [x0]
; RCPC3-NEXT:    // kill: def $d0 killed $d0 killed $q0
; RCPC3-NEXT:    ret
;
; NO-RCPC3-LABEL: test_ldap1_1xi64_lane0:
; NO-RCPC3:       // %bb.0:
; NO-RCPC3-NEXT:    ldapr x8, [x0]
; NO-RCPC3-NEXT:    fmov d0, x8
; NO-RCPC3-NEXT:    ret
  %1 = load atomic i64, ptr %a acquire, align 8
  %ldap1 = insertelement <1 x i64> poison, i64 %1, i64 0
  ret <1 x i64> %ldap1
}

define hidden nofpclass(nan inf) <1 x double> @test_ldap1_1xdouble_lane0(ptr nocapture noundef readonly %a, <1 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr {
;
; RCPC3-LABEL: test_ldap1_1xdouble_lane0:
; RCPC3:       // %bb.0:
; RCPC3-NEXT:    ldap1 { v0.d }[0], [x0]
; RCPC3-NEXT:    // kill: def $d0 killed $d0 killed $q0
; RCPC3-NEXT:    ret
;
; NO-RCPC3-LABEL: test_ldap1_1xdouble_lane0:
; NO-RCPC3:       // %bb.0:
; NO-RCPC3-NEXT:    ldapr x8, [x0]
; NO-RCPC3-NEXT:    fmov d0, x8
; NO-RCPC3-NEXT:    ret
  %1 = load atomic double, ptr %a acquire, align 8
  %ldap1 = insertelement <1 x double> poison, double %1, i64 0
  ret <1 x double> %ldap1
}

define hidden void @test_stl1_2xi64_lane0(ptr nocapture noundef writeonly %a, <2 x i64> noundef %b) local_unnamed_addr {
;
; RCPC3-LABEL: test_stl1_2xi64_lane0:
; RCPC3:       // %bb.0:
; RCPC3-NEXT:    stl1 { v0.d }[0], [x0]
; RCPC3-NEXT:    ret
;
; NO-RCPC3-LABEL: test_stl1_2xi64_lane0:
; NO-RCPC3:       // %bb.0:
; NO-RCPC3-NEXT:    fmov x8, d0
; NO-RCPC3-NEXT:    stlr x8, [x0]
; NO-RCPC3-NEXT:    ret
  %1 = extractelement <2 x i64> %b, i64 0
  store atomic i64 %1, ptr %a release, align 8
  ret void
}

define hidden void @test_stl1_2xi64_lane1(ptr nocapture noundef writeonly %a, <2 x i64> noundef %b) local_unnamed_addr {
;
; RCPC3-LABEL: test_stl1_2xi64_lane1:
; RCPC3:       // %bb.0:
; RCPC3-NEXT:    stl1 { v0.d }[1], [x0]
; RCPC3-NEXT:    ret
;
; NO-RCPC3-LABEL: test_stl1_2xi64_lane1:
; NO-RCPC3:       // %bb.0:
; NO-RCPC3-NEXT:    mov x8, v0.d[1]
; NO-RCPC3-NEXT:    stlr x8, [x0]
; NO-RCPC3-NEXT:    ret
  %1 = extractelement <2 x i64> %b, i64 1
  store atomic i64 %1, ptr %a release, align 8
  ret void
}

define hidden void @test_stl1_2xdouble_lane0(ptr nocapture noundef writeonly %a, <2 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr {
;
; RCPC3-LABEL: test_stl1_2xdouble_lane0:
; RCPC3:       // %bb.0:
; RCPC3-NEXT:    stl1 { v0.d }[0], [x0]
; RCPC3-NEXT:    ret
;
; NO-RCPC3-LABEL: test_stl1_2xdouble_lane0:
; NO-RCPC3:       // %bb.0:
; NO-RCPC3-NEXT:    fmov x8, d0
; NO-RCPC3-NEXT:    stlr x8, [x0]
; NO-RCPC3-NEXT:    ret
  %1 = extractelement <2 x double> %b, i64 0
  store atomic double %1, ptr %a release, align 8
  ret void
}

define hidden void @test_stl1_2xdouble_lane1(ptr nocapture noundef writeonly %a, <2 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr {
;
; RCPC3-LABEL: test_stl1_2xdouble_lane1:
; RCPC3:       // %bb.0:
; RCPC3-NEXT:    stl1 { v0.d }[1], [x0]
; RCPC3-NEXT:    ret
;
; NO-RCPC3-LABEL: test_stl1_2xdouble_lane1:
; NO-RCPC3:       // %bb.0:
; NO-RCPC3-NEXT:    mov d0, v0.d[1]
; NO-RCPC3-NEXT:    fmov x8, d0
; NO-RCPC3-NEXT:    stlr x8, [x0]
; NO-RCPC3-NEXT:    ret
  %1 = extractelement <2 x double> %b, i64 1
  store atomic double %1, ptr %a release, align 8
  ret void
}

define hidden void @test_stl1_1xi64_lane0(ptr nocapture noundef writeonly %a, <1 x i64> noundef %b) local_unnamed_addr {
;
; RCPC3-LABEL: test_stl1_1xi64_lane0:
; RCPC3:       // %bb.0:
; RCPC3-NEXT:    // kill: def $d0 killed $d0 def $q0
; RCPC3-NEXT:    stl1 { v0.d }[0], [x0]
; RCPC3-NEXT:    ret
;
; NO-RCPC3-LABEL: test_stl1_1xi64_lane0:
; NO-RCPC3:       // %bb.0:
; NO-RCPC3-NEXT:    // kill: def $d0 killed $d0 def $q0
; NO-RCPC3-NEXT:    fmov x8, d0
; NO-RCPC3-NEXT:    stlr x8, [x0]
; NO-RCPC3-NEXT:    ret
  %1 = extractelement <1 x i64> %b, i64 0
  store atomic i64 %1, ptr %a release, align 8
  ret void
}

define hidden void @test_stl1_1xdouble_lane0(ptr nocapture noundef writeonly %a, <1 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr {
;
; RCPC3-LABEL: test_stl1_1xdouble_lane0:
; RCPC3:       // %bb.0:
; RCPC3-NEXT:    // kill: def $d0 killed $d0 def $q0
; RCPC3-NEXT:    stl1 { v0.d }[0], [x0]
; RCPC3-NEXT:    ret
;
; NO-RCPC3-LABEL: test_stl1_1xdouble_lane0:
; NO-RCPC3:       // %bb.0:
; NO-RCPC3-NEXT:    fmov x8, d0
; NO-RCPC3-NEXT:    stlr x8, [x0]
; NO-RCPC3-NEXT:    ret
  %1 = extractelement <1 x double> %b, i64 0
  store atomic double %1, ptr %a release, align 8
  ret void
}

; The remaining tests do not have any particular RCPC3-specific codegen:

; load-acquire a plain non-vector double value
define hidden double @test_double_load(ptr nocapture noundef readonly %a) local_unnamed_addr {
; BOTH-LABEL: test_double_load:
; BOTH:       // %bb.0:
; BOTH-NEXT:    ldapr x8, [x0]
; BOTH-NEXT:    fmov d0, x8
; BOTH-NEXT:    ret
  %1 = load atomic double, ptr %a acquire, align 8
  ret double %1
}

; store-release a plain non-vector double value
define hidden void @test_double_store(ptr nocapture noundef writeonly %a, double noundef %b) local_unnamed_addr {
; BOTH-LABEL: test_double_store:
; BOTH:       // %bb.0:
; BOTH-NEXT:    fmov x8, d0
; BOTH-NEXT:    stlr x8, [x0]
; BOTH-NEXT:    ret
  store atomic double %b, ptr %a release, align 8
  ret void
}

; load-acquire an i64, followed by a bitcast to a 64-bit vector
define hidden <2 x i32> @test_load_i64_bitcast_2xi32(ptr nocapture noundef readonly %a) local_unnamed_addr {
; BOTH-LABEL: test_load_i64_bitcast_2xi32:
; BOTH:       // %bb.0:
; BOTH-NEXT:    ldapr x8, [x0]
; BOTH-NEXT:    fmov d0, x8
; BOTH-NEXT:    ret
  %1 = load atomic i64, ptr %a acquire, align 8
  %2 = bitcast i64 %1 to <2 x i32>
  ret <2 x i32> %2
}

; bitcast from a 64-bit vector, followed by a store-release of the i64
define hidden void @test_bitcast_2xi32_store_i64(ptr nocapture noundef readonly %a, <2 x i32> noundef %b) local_unnamed_addr {
; BOTH-LABEL: test_bitcast_2xi32_store_i64:
; BOTH:       // %bb.0:
; BOTH-NEXT:    fmov x8, d0
; BOTH-NEXT:    stlr x8, [x0]
; BOTH-NEXT:    ret
  %1 = bitcast <2 x i32> %b to i64
  store atomic i64 %1, ptr %a release, align 8
  ret void
}

; (non-atomic) load a 64-bit vector
define hidden <2 x i32> @test_load_2xi32(ptr nocapture noundef readonly %a) local_unnamed_addr {
; BOTH-LABEL: test_load_2xi32:
; BOTH:       // %bb.0:
; BOTH-NEXT:    ldr d0, [x0]
; BOTH-NEXT:    ret
  %1 = load <2 x i32>, ptr %a, align 8
  ret <2 x i32> %1
}

; (non-atomic) store a 64-bit vector
define hidden void @test_store_2xi32(ptr nocapture noundef writeonly %a, <2 x i32> noundef %b) local_unnamed_addr {
; BOTH-LABEL: test_store_2xi32:
; BOTH:       // %bb.0:
; BOTH-NEXT:    str d0, [x0]
; BOTH-NEXT:    ret
  store <2 x i32> %b, ptr %a, align 8
  ret void
}

; (non-atomic) load a 64-bit vector
define hidden <1 x i64> @test_load_1xi64(ptr nocapture noundef readonly %a) local_unnamed_addr {
; BOTH-LABEL: test_load_1xi64:
; BOTH:       // %bb.0:
; BOTH-NEXT:    ldr d0, [x0]
; BOTH-NEXT:    ret
  %1 = load <1 x i64>, ptr %a, align 8
  ret <1 x i64> %1
}

; (non-atomic) store a 64-bit vector
define hidden void @test_store_1xi64(ptr nocapture noundef writeonly %a, <1 x i64> noundef %b) local_unnamed_addr {
; BOTH-LABEL: test_store_1xi64:
; BOTH:       // %bb.0:
; BOTH-NEXT:    str d0, [x0]
; BOTH-NEXT:    ret
  store <1 x i64> %b, ptr %a, align 8
  ret void
}

; (non-atomic) load a 64-bit value and insert into vector
define hidden <2 x i64> @test_load_insert_2xi64(ptr nocapture noundef readonly %a, <2 x i64> noundef %b) local_unnamed_addr {
; BOTH-LABEL: test_load_insert_2xi64:
; BOTH:       // %bb.0:
; BOTH-NEXT:    ld1 { v0.d }[0], [x0]
; BOTH-NEXT:    ret
  %1 = load i64, ptr %a, align 8
  %2 = insertelement <2 x i64> %b, i64 %1, i64 0
  ret <2 x i64> %2
}

; extract from vector and (non-atomic) store a 64-bit value
define hidden void @test_extract_store_2xi64(ptr nocapture noundef writeonly %a, <2 x i64> noundef %b) local_unnamed_addr {
; BOTH-LABEL: test_extract_store_2xi64:
; BOTH:       // %bb.0:
; BOTH-NEXT:    st1 { v0.d }[1], [x0]
; BOTH-NEXT:    ret
  %1 = extractelement <2 x i64> %b, i64 1
  store i64 %1, ptr %a, align 8
  ret void
}