llvm/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN:   | FileCheck -check-prefix=RV32I %s
; RUN: llc -mtriple=riscv32 -verify-machineinstrs -code-model=medium < %s \
; RUN:   | FileCheck -check-prefix=RV32I-MEDIUM %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN:   | FileCheck -check-prefix=RV64I %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs -code-model=medium < %s \
; RUN:   | FileCheck -check-prefix=RV64I-MEDIUM %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs -code-model=large < %s \
; RUN:   | FileCheck -check-prefix=RV64I-LARGE %s

; We can often fold an ADDI into the offset of load/store instructions:
;   (load (addi base, off1), off2) -> (load base, off1+off2)
;   (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
; This is possible when the off1+off2 continues to fit the 12-bit immediate.
; Check if we do the fold under various conditions. If off1 is (the low part of)
; an address the fold's safety depends on the variable's alignment.

@g_0 = dso_local global i64 0
@g_1 = dso_local global i64 0, align 1
@g_2 = dso_local global i64 0, align 2
@g_4 = dso_local global i64 0, align 4
@g_8 = dso_local global i64 0, align 8
@g_16 = dso_local global i64 0, align 16

define dso_local i64 @load_g_0() nounwind {
; RV32I-LABEL: load_g_0:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    lui a1, %hi(g_0)
; RV32I-NEXT:    lw a0, %lo(g_0)(a1)
; RV32I-NEXT:    lw a1, %lo(g_0+4)(a1)
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: load_g_0:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:  .Lpcrel_hi0:
; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g_0)
; RV32I-MEDIUM-NEXT:    addi a1, a0, %pcrel_lo(.Lpcrel_hi0)
; RV32I-MEDIUM-NEXT:    lw a0, 0(a1)
; RV32I-MEDIUM-NEXT:    lw a1, 4(a1)
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: load_g_0:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:    lui a0, %hi(g_0)
; RV64I-NEXT:    ld a0, %lo(g_0)(a0)
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: load_g_0:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:  .Lpcrel_hi0:
; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g_0)
; RV64I-MEDIUM-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi0)(a0)
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: load_g_0:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:  .Lpcrel_hi0:
; RV64I-LARGE-NEXT:    auipc a0, %pcrel_hi(.LCPI0_0)
; RV64I-LARGE-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi0)(a0)
; RV64I-LARGE-NEXT:    ld a0, 0(a0)
; RV64I-LARGE-NEXT:    ret
entry:
  %0 = load i64, ptr @g_0
  ret i64 %0
}

define dso_local i64 @load_g_1() nounwind {
; RV32I-LABEL: load_g_1:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    lui a1, %hi(g_1)
; RV32I-NEXT:    lw a0, %lo(g_1)(a1)
; RV32I-NEXT:    addi a1, a1, %lo(g_1)
; RV32I-NEXT:    lw a1, 4(a1)
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: load_g_1:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:  .Lpcrel_hi1:
; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g_1)
; RV32I-MEDIUM-NEXT:    addi a1, a0, %pcrel_lo(.Lpcrel_hi1)
; RV32I-MEDIUM-NEXT:    lw a0, 0(a1)
; RV32I-MEDIUM-NEXT:    lw a1, 4(a1)
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: load_g_1:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:    lui a0, %hi(g_1)
; RV64I-NEXT:    ld a0, %lo(g_1)(a0)
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: load_g_1:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:  .Lpcrel_hi1:
; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g_1)
; RV64I-MEDIUM-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi1)(a0)
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: load_g_1:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:  .Lpcrel_hi1:
; RV64I-LARGE-NEXT:    auipc a0, %pcrel_hi(.LCPI1_0)
; RV64I-LARGE-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi1)(a0)
; RV64I-LARGE-NEXT:    ld a0, 0(a0)
; RV64I-LARGE-NEXT:    ret
entry:
  %0 = load i64, ptr @g_1
  ret i64 %0
}

define dso_local i64 @load_g_2() nounwind {
; RV32I-LABEL: load_g_2:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    lui a1, %hi(g_2)
; RV32I-NEXT:    lw a0, %lo(g_2)(a1)
; RV32I-NEXT:    addi a1, a1, %lo(g_2)
; RV32I-NEXT:    lw a1, 4(a1)
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: load_g_2:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:  .Lpcrel_hi2:
; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g_2)
; RV32I-MEDIUM-NEXT:    addi a1, a0, %pcrel_lo(.Lpcrel_hi2)
; RV32I-MEDIUM-NEXT:    lw a0, 0(a1)
; RV32I-MEDIUM-NEXT:    lw a1, 4(a1)
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: load_g_2:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:    lui a0, %hi(g_2)
; RV64I-NEXT:    ld a0, %lo(g_2)(a0)
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: load_g_2:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:  .Lpcrel_hi2:
; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g_2)
; RV64I-MEDIUM-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi2)(a0)
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: load_g_2:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:  .Lpcrel_hi2:
; RV64I-LARGE-NEXT:    auipc a0, %pcrel_hi(.LCPI2_0)
; RV64I-LARGE-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi2)(a0)
; RV64I-LARGE-NEXT:    ld a0, 0(a0)
; RV64I-LARGE-NEXT:    ret
entry:
  %0 = load i64, ptr @g_2
  ret i64 %0
}

define dso_local i64 @load_g_4() nounwind {
; RV32I-LABEL: load_g_4:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    lui a1, %hi(g_4)
; RV32I-NEXT:    lw a0, %lo(g_4)(a1)
; RV32I-NEXT:    addi a1, a1, %lo(g_4)
; RV32I-NEXT:    lw a1, 4(a1)
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: load_g_4:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:  .Lpcrel_hi3:
; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g_4)
; RV32I-MEDIUM-NEXT:    addi a1, a0, %pcrel_lo(.Lpcrel_hi3)
; RV32I-MEDIUM-NEXT:    lw a0, 0(a1)
; RV32I-MEDIUM-NEXT:    lw a1, 4(a1)
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: load_g_4:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:    lui a0, %hi(g_4)
; RV64I-NEXT:    ld a0, %lo(g_4)(a0)
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: load_g_4:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:  .Lpcrel_hi3:
; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g_4)
; RV64I-MEDIUM-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi3)(a0)
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: load_g_4:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:  .Lpcrel_hi3:
; RV64I-LARGE-NEXT:    auipc a0, %pcrel_hi(.LCPI3_0)
; RV64I-LARGE-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi3)(a0)
; RV64I-LARGE-NEXT:    ld a0, 0(a0)
; RV64I-LARGE-NEXT:    ret
entry:
  %0 = load i64, ptr @g_4
  ret i64 %0
}

define dso_local i64 @load_g_8() nounwind {
; RV32I-LABEL: load_g_8:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    lui a1, %hi(g_8)
; RV32I-NEXT:    lw a0, %lo(g_8)(a1)
; RV32I-NEXT:    lw a1, %lo(g_8+4)(a1)
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: load_g_8:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:  .Lpcrel_hi4:
; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g_8)
; RV32I-MEDIUM-NEXT:    addi a1, a0, %pcrel_lo(.Lpcrel_hi4)
; RV32I-MEDIUM-NEXT:    lw a0, 0(a1)
; RV32I-MEDIUM-NEXT:    lw a1, 4(a1)
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: load_g_8:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:    lui a0, %hi(g_8)
; RV64I-NEXT:    ld a0, %lo(g_8)(a0)
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: load_g_8:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:  .Lpcrel_hi4:
; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g_8)
; RV64I-MEDIUM-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi4)(a0)
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: load_g_8:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:  .Lpcrel_hi4:
; RV64I-LARGE-NEXT:    auipc a0, %pcrel_hi(.LCPI4_0)
; RV64I-LARGE-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi4)(a0)
; RV64I-LARGE-NEXT:    ld a0, 0(a0)
; RV64I-LARGE-NEXT:    ret
entry:
  %0 = load i64, ptr @g_8
  ret i64 %0
}

define dso_local i64 @load_g_16() nounwind {
; RV32I-LABEL: load_g_16:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    lui a1, %hi(g_16)
; RV32I-NEXT:    lw a0, %lo(g_16)(a1)
; RV32I-NEXT:    lw a1, %lo(g_16+4)(a1)
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: load_g_16:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:  .Lpcrel_hi5:
; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g_16)
; RV32I-MEDIUM-NEXT:    addi a1, a0, %pcrel_lo(.Lpcrel_hi5)
; RV32I-MEDIUM-NEXT:    lw a0, 0(a1)
; RV32I-MEDIUM-NEXT:    lw a1, 4(a1)
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: load_g_16:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:    lui a0, %hi(g_16)
; RV64I-NEXT:    ld a0, %lo(g_16)(a0)
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: load_g_16:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:  .Lpcrel_hi5:
; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g_16)
; RV64I-MEDIUM-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi5)(a0)
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: load_g_16:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:  .Lpcrel_hi5:
; RV64I-LARGE-NEXT:    auipc a0, %pcrel_hi(.LCPI5_0)
; RV64I-LARGE-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi5)(a0)
; RV64I-LARGE-NEXT:    ld a0, 0(a0)
; RV64I-LARGE-NEXT:    ret
entry:
  %0 = load i64, ptr @g_16
  ret i64 %0
}

define dso_local void @store_g_4() nounwind {
; RV32I-LABEL: store_g_4:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    lui a0, %hi(g_4)
; RV32I-NEXT:    sw zero, %lo(g_4)(a0)
; RV32I-NEXT:    addi a0, a0, %lo(g_4)
; RV32I-NEXT:    sw zero, 4(a0)
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: store_g_4:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:  .Lpcrel_hi6:
; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g_4)
; RV32I-MEDIUM-NEXT:    addi a0, a0, %pcrel_lo(.Lpcrel_hi6)
; RV32I-MEDIUM-NEXT:    sw zero, 4(a0)
; RV32I-MEDIUM-NEXT:    sw zero, 0(a0)
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: store_g_4:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:    lui a0, %hi(g_4)
; RV64I-NEXT:    sd zero, %lo(g_4)(a0)
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: store_g_4:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:  .Lpcrel_hi6:
; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g_4)
; RV64I-MEDIUM-NEXT:    sd zero, %pcrel_lo(.Lpcrel_hi6)(a0)
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: store_g_4:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:  .Lpcrel_hi6:
; RV64I-LARGE-NEXT:    auipc a0, %pcrel_hi(.LCPI6_0)
; RV64I-LARGE-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi6)(a0)
; RV64I-LARGE-NEXT:    sd zero, 0(a0)
; RV64I-LARGE-NEXT:    ret
entry:
   store i64 0, ptr @g_4
   ret void
}

define dso_local void @store_g_8() nounwind {
; RV32I-LABEL: store_g_8:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    lui a0, %hi(g_8)
; RV32I-NEXT:    sw zero, %lo(g_8+4)(a0)
; RV32I-NEXT:    sw zero, %lo(g_8)(a0)
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: store_g_8:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:  .Lpcrel_hi7:
; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g_8)
; RV32I-MEDIUM-NEXT:    addi a0, a0, %pcrel_lo(.Lpcrel_hi7)
; RV32I-MEDIUM-NEXT:    sw zero, 4(a0)
; RV32I-MEDIUM-NEXT:    sw zero, 0(a0)
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: store_g_8:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:    lui a0, %hi(g_8)
; RV64I-NEXT:    sd zero, %lo(g_8)(a0)
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: store_g_8:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:  .Lpcrel_hi7:
; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g_8)
; RV64I-MEDIUM-NEXT:    sd zero, %pcrel_lo(.Lpcrel_hi7)(a0)
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: store_g_8:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:  .Lpcrel_hi7:
; RV64I-LARGE-NEXT:    auipc a0, %pcrel_hi(.LCPI7_0)
; RV64I-LARGE-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi7)(a0)
; RV64I-LARGE-NEXT:    sd zero, 0(a0)
; RV64I-LARGE-NEXT:    ret
entry:
   store i64 0, ptr @g_8
   ret void
}

; Check if we can fold ADDI into the offset of store instructions,
; when store instructions is the root node in DAG.

@g_4_i32 = global i32 0, align 4

define dso_local void @inc_g_i32() nounwind {
; RV32I-LABEL: inc_g_i32:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    lui a0, %hi(g_4_i32)
; RV32I-NEXT:    lw a1, %lo(g_4_i32)(a0)
; RV32I-NEXT:    addi a1, a1, 1
; RV32I-NEXT:    sw a1, %lo(g_4_i32)(a0)
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: inc_g_i32:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:  .Lpcrel_hi8:
; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g_4_i32)
; RV32I-MEDIUM-NEXT:    lw a1, %pcrel_lo(.Lpcrel_hi8)(a0)
; RV32I-MEDIUM-NEXT:    addi a1, a1, 1
; RV32I-MEDIUM-NEXT:    sw a1, %pcrel_lo(.Lpcrel_hi8)(a0)
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: inc_g_i32:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:    lui a0, %hi(g_4_i32)
; RV64I-NEXT:    lw a1, %lo(g_4_i32)(a0)
; RV64I-NEXT:    addi a1, a1, 1
; RV64I-NEXT:    sw a1, %lo(g_4_i32)(a0)
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: inc_g_i32:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:  .Lpcrel_hi8:
; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g_4_i32)
; RV64I-MEDIUM-NEXT:    lw a1, %pcrel_lo(.Lpcrel_hi8)(a0)
; RV64I-MEDIUM-NEXT:    addi a1, a1, 1
; RV64I-MEDIUM-NEXT:    sw a1, %pcrel_lo(.Lpcrel_hi8)(a0)
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: inc_g_i32:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:  .Lpcrel_hi8:
; RV64I-LARGE-NEXT:    auipc a0, %pcrel_hi(.LCPI8_0)
; RV64I-LARGE-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi8)(a0)
; RV64I-LARGE-NEXT:    lw a1, 0(a0)
; RV64I-LARGE-NEXT:    addi a1, a1, 1
; RV64I-LARGE-NEXT:    sw a1, 0(a0)
; RV64I-LARGE-NEXT:    ret
entry:
  %0 = load i32, ptr @g_4_i32
  %inc = add i32 %0, 1
  store i32 %inc, ptr @g_4_i32
  br label %if.end

if.end:
  ret void
}

; Check for folds in accesses to elements of an i32 array.

@ga = dso_local local_unnamed_addr global [2 x i32] zeroinitializer, align 4

define dso_local i32 @load_ga() local_unnamed_addr #0 {
; RV32I-LABEL: load_ga:
; RV32I:       # %bb.0:
; RV32I-NEXT:    lui a0, %hi(ga+4)
; RV32I-NEXT:    lw a0, %lo(ga+4)(a0)
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: load_ga:
; RV32I-MEDIUM:       # %bb.0:
; RV32I-MEDIUM-NEXT:  .Lpcrel_hi9:
; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(ga+4)
; RV32I-MEDIUM-NEXT:    lw a0, %pcrel_lo(.Lpcrel_hi9)(a0)
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: load_ga:
; RV64I:       # %bb.0:
; RV64I-NEXT:    lui a0, %hi(ga+4)
; RV64I-NEXT:    lw a0, %lo(ga+4)(a0)
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: load_ga:
; RV64I-MEDIUM:       # %bb.0:
; RV64I-MEDIUM-NEXT:  .Lpcrel_hi9:
; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(ga+4)
; RV64I-MEDIUM-NEXT:    lw a0, %pcrel_lo(.Lpcrel_hi9)(a0)
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: load_ga:
; RV64I-LARGE:       # %bb.0:
; RV64I-LARGE-NEXT:  .Lpcrel_hi9:
; RV64I-LARGE-NEXT:    auipc a0, %pcrel_hi(.LCPI9_0)
; RV64I-LARGE-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi9)(a0)
; RV64I-LARGE-NEXT:    lw a0, 4(a0)
; RV64I-LARGE-NEXT:    ret
  %1 = load i32, ptr getelementptr inbounds ([2 x i32], ptr @ga, i32 0, i32 1), align 4
  ret i32 %1
}

; Check for folds in accesses to the second element of an i64 array.

@ga_8 = dso_local local_unnamed_addr global [2 x i64] zeroinitializer, align 8
@ga_16 = dso_local local_unnamed_addr global [2 x i64] zeroinitializer, align 16

define dso_local i64 @load_ga_8() nounwind {
; RV32I-LABEL: load_ga_8:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    lui a1, %hi(ga_8)
; RV32I-NEXT:    addi a1, a1, %lo(ga_8)
; RV32I-NEXT:    lw a0, 8(a1)
; RV32I-NEXT:    lw a1, 12(a1)
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: load_ga_8:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:  .Lpcrel_hi10:
; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(ga_8)
; RV32I-MEDIUM-NEXT:    addi a1, a0, %pcrel_lo(.Lpcrel_hi10)
; RV32I-MEDIUM-NEXT:    lw a0, 8(a1)
; RV32I-MEDIUM-NEXT:    lw a1, 12(a1)
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: load_ga_8:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:    lui a0, %hi(ga_8+8)
; RV64I-NEXT:    ld a0, %lo(ga_8+8)(a0)
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: load_ga_8:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:  .Lpcrel_hi10:
; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(ga_8+8)
; RV64I-MEDIUM-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi10)(a0)
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: load_ga_8:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:  .Lpcrel_hi10:
; RV64I-LARGE-NEXT:    auipc a0, %pcrel_hi(.LCPI10_0)
; RV64I-LARGE-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi10)(a0)
; RV64I-LARGE-NEXT:    ld a0, 8(a0)
; RV64I-LARGE-NEXT:    ret
entry:
  %0 = load i64, ptr getelementptr inbounds ([2 x i64], ptr @ga_8, i32 0, i32 1)
  ret i64 %0
}

define dso_local i64 @load_ga_16() nounwind {
; RV32I-LABEL: load_ga_16:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    lui a1, %hi(ga_16)
; RV32I-NEXT:    lw a0, %lo(ga_16+8)(a1)
; RV32I-NEXT:    lw a1, %lo(ga_16+12)(a1)
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: load_ga_16:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:  .Lpcrel_hi11:
; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(ga_16)
; RV32I-MEDIUM-NEXT:    addi a1, a0, %pcrel_lo(.Lpcrel_hi11)
; RV32I-MEDIUM-NEXT:    lw a0, 8(a1)
; RV32I-MEDIUM-NEXT:    lw a1, 12(a1)
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: load_ga_16:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:    lui a0, %hi(ga_16)
; RV64I-NEXT:    ld a0, %lo(ga_16+8)(a0)
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: load_ga_16:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:  .Lpcrel_hi11:
; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(ga_16+8)
; RV64I-MEDIUM-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi11)(a0)
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: load_ga_16:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:  .Lpcrel_hi11:
; RV64I-LARGE-NEXT:    auipc a0, %pcrel_hi(.LCPI11_0)
; RV64I-LARGE-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi11)(a0)
; RV64I-LARGE-NEXT:    ld a0, 8(a0)
; RV64I-LARGE-NEXT:    ret
entry:
  %0 = load i64, ptr getelementptr inbounds ([2 x i64], ptr @ga_16, i32 0, i32 1)
  ret i64 %0
}

; Check for folds in accesses to block address.
define dso_local ptr @load_ba_1() nounwind {
; RV32I-LABEL: load_ba_1:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:  .Ltmp0: # Block address taken
; RV32I-NEXT:  # %bb.1: # %label
; RV32I-NEXT:    lui a0, %hi(.Ltmp0)
; RV32I-NEXT:    lw a0, %lo(.Ltmp0)(a0)
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: load_ba_1:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:  .Ltmp0: # Block address taken
; RV32I-MEDIUM-NEXT:  # %bb.1: # %label
; RV32I-MEDIUM-NEXT:  .Lpcrel_hi12:
; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(.Ltmp0)
; RV32I-MEDIUM-NEXT:    lw a0, %pcrel_lo(.Lpcrel_hi12)(a0)
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: load_ba_1:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:  .Ltmp0: # Block address taken
; RV64I-NEXT:  # %bb.1: # %label
; RV64I-NEXT:    lui a0, %hi(.Ltmp0)
; RV64I-NEXT:    ld a0, %lo(.Ltmp0)(a0)
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: load_ba_1:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:  .Ltmp0: # Block address taken
; RV64I-MEDIUM-NEXT:  # %bb.1: # %label
; RV64I-MEDIUM-NEXT:  .Lpcrel_hi12:
; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(.Ltmp0)
; RV64I-MEDIUM-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi12)(a0)
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: load_ba_1:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:  .Ltmp0: # Block address taken
; RV64I-LARGE-NEXT:  # %bb.1: # %label
; RV64I-LARGE-NEXT:  .Lpcrel_hi12:
; RV64I-LARGE-NEXT:    auipc a0, %pcrel_hi(.Ltmp0)
; RV64I-LARGE-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi12)(a0)
; RV64I-LARGE-NEXT:    ret
entry:
  br label %label
label:
  %0 = load ptr, ptr blockaddress(@load_ba_1, %label)
  ret ptr %0
}

define dso_local ptr @load_ba_2() nounwind {
; RV32I-LABEL: load_ba_2:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:  .Ltmp1: # Block address taken
; RV32I-NEXT:  # %bb.1: # %label
; RV32I-NEXT:    lui a0, %hi(.Ltmp1+8)
; RV32I-NEXT:    lw a0, %lo(.Ltmp1+8)(a0)
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: load_ba_2:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:  .Ltmp1: # Block address taken
; RV32I-MEDIUM-NEXT:  # %bb.1: # %label
; RV32I-MEDIUM-NEXT:  .Lpcrel_hi13:
; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(.Ltmp1+8)
; RV32I-MEDIUM-NEXT:    lw a0, %pcrel_lo(.Lpcrel_hi13)(a0)
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: load_ba_2:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:  .Ltmp1: # Block address taken
; RV64I-NEXT:  # %bb.1: # %label
; RV64I-NEXT:    lui a0, %hi(.Ltmp1+8)
; RV64I-NEXT:    ld a0, %lo(.Ltmp1+8)(a0)
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: load_ba_2:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:  .Ltmp1: # Block address taken
; RV64I-MEDIUM-NEXT:  # %bb.1: # %label
; RV64I-MEDIUM-NEXT:  .Lpcrel_hi13:
; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(.Ltmp1+8)
; RV64I-MEDIUM-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi13)(a0)
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: load_ba_2:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:  .Ltmp1: # Block address taken
; RV64I-LARGE-NEXT:  # %bb.1: # %label
; RV64I-LARGE-NEXT:  .Lpcrel_hi13:
; RV64I-LARGE-NEXT:    auipc a0, %pcrel_hi(.Ltmp1+8)
; RV64I-LARGE-NEXT:    ld a0, %pcrel_lo(.Lpcrel_hi13)(a0)
; RV64I-LARGE-NEXT:    ret
entry:
  br label %label
label:
  %0 = load ptr, ptr getelementptr inbounds (i8, ptr blockaddress(@load_ba_2, %label), i32 8)
  ret ptr %0
}

; Check for folds in accesses to thread-local variables.

@tl_4 = dso_local thread_local global i64 0, align 4
@tl_8 = dso_local thread_local global i64 0, align 8

define dso_local i64 @load_tl_4() nounwind {
; RV32I-LABEL: load_tl_4:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    lui a0, %tprel_hi(tl_4)
; RV32I-NEXT:    add a1, a0, tp, %tprel_add(tl_4)
; RV32I-NEXT:    lw a0, %tprel_lo(tl_4)(a1)
; RV32I-NEXT:    addi a1, a1, %tprel_lo(tl_4)
; RV32I-NEXT:    lw a1, 4(a1)
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: load_tl_4:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:    lui a0, %tprel_hi(tl_4)
; RV32I-MEDIUM-NEXT:    add a1, a0, tp, %tprel_add(tl_4)
; RV32I-MEDIUM-NEXT:    lw a0, %tprel_lo(tl_4)(a1)
; RV32I-MEDIUM-NEXT:    addi a1, a1, %tprel_lo(tl_4)
; RV32I-MEDIUM-NEXT:    lw a1, 4(a1)
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: load_tl_4:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:    lui a0, %tprel_hi(tl_4)
; RV64I-NEXT:    add a0, a0, tp, %tprel_add(tl_4)
; RV64I-NEXT:    ld a0, %tprel_lo(tl_4)(a0)
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: load_tl_4:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:    lui a0, %tprel_hi(tl_4)
; RV64I-MEDIUM-NEXT:    add a0, a0, tp, %tprel_add(tl_4)
; RV64I-MEDIUM-NEXT:    ld a0, %tprel_lo(tl_4)(a0)
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: load_tl_4:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:    lui a0, %tprel_hi(tl_4)
; RV64I-LARGE-NEXT:    add a0, a0, tp, %tprel_add(tl_4)
; RV64I-LARGE-NEXT:    ld a0, %tprel_lo(tl_4)(a0)
; RV64I-LARGE-NEXT:    ret
entry:
  %0 = load i64, ptr @tl_4
  ret i64 %0
}

define dso_local i64 @load_tl_8() nounwind {
; RV32I-LABEL: load_tl_8:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    lui a0, %tprel_hi(tl_8)
; RV32I-NEXT:    add a1, a0, tp, %tprel_add(tl_8)
; RV32I-NEXT:    lw a0, %tprel_lo(tl_8)(a1)
; RV32I-NEXT:    lw a1, %tprel_lo(tl_8+4)(a1)
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: load_tl_8:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:    lui a0, %tprel_hi(tl_8)
; RV32I-MEDIUM-NEXT:    add a1, a0, tp, %tprel_add(tl_8)
; RV32I-MEDIUM-NEXT:    lw a0, %tprel_lo(tl_8)(a1)
; RV32I-MEDIUM-NEXT:    lw a1, %tprel_lo(tl_8+4)(a1)
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: load_tl_8:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:    lui a0, %tprel_hi(tl_8)
; RV64I-NEXT:    add a0, a0, tp, %tprel_add(tl_8)
; RV64I-NEXT:    ld a0, %tprel_lo(tl_8)(a0)
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: load_tl_8:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:    lui a0, %tprel_hi(tl_8)
; RV64I-MEDIUM-NEXT:    add a0, a0, tp, %tprel_add(tl_8)
; RV64I-MEDIUM-NEXT:    ld a0, %tprel_lo(tl_8)(a0)
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: load_tl_8:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:    lui a0, %tprel_hi(tl_8)
; RV64I-LARGE-NEXT:    add a0, a0, tp, %tprel_add(tl_8)
; RV64I-LARGE-NEXT:    ld a0, %tprel_lo(tl_8)(a0)
; RV64I-LARGE-NEXT:    ret
entry:
  %0 = load i64, ptr @tl_8
  ret i64 %0
}

define dso_local i64 @load_const_ok() nounwind {
; RV32I-LABEL: load_const_ok:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    lw a0, 2040(zero)
; RV32I-NEXT:    lw a1, 2044(zero)
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: load_const_ok:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:    lw a0, 2040(zero)
; RV32I-MEDIUM-NEXT:    lw a1, 2044(zero)
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: load_const_ok:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:    ld a0, 2040(zero)
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: load_const_ok:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:    ld a0, 2040(zero)
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: load_const_ok:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:    ld a0, 2040(zero)
; RV64I-LARGE-NEXT:    ret
entry:
  %0 = load i64, ptr inttoptr (i32 2040 to ptr)
  ret i64 %0
}

define dso_local i64 @load_cost_overflow() nounwind {
; RV32I-LABEL: load_cost_overflow:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    lui a0, 1
; RV32I-NEXT:    lw a1, -2048(a0)
; RV32I-NEXT:    lw a0, 2044(zero)
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: load_cost_overflow:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:    lui a0, 1
; RV32I-MEDIUM-NEXT:    lw a1, -2048(a0)
; RV32I-MEDIUM-NEXT:    lw a0, 2044(zero)
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: load_cost_overflow:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:    ld a0, 2044(zero)
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: load_cost_overflow:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:    ld a0, 2044(zero)
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: load_cost_overflow:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:    ld a0, 2044(zero)
; RV64I-LARGE-NEXT:    ret
entry:
  %0 = load i64, ptr inttoptr (i64 2044 to ptr)
  ret i64 %0
}

define dso_local i32 @load_const_medium() nounwind {
; RV32I-LABEL: load_const_medium:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    lui a0, 1
; RV32I-NEXT:    lw a0, -16(a0)
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: load_const_medium:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:    lui a0, 1
; RV32I-MEDIUM-NEXT:    lw a0, -16(a0)
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: load_const_medium:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:    lui a0, 1
; RV64I-NEXT:    lw a0, -16(a0)
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: load_const_medium:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:    lui a0, 1
; RV64I-MEDIUM-NEXT:    lw a0, -16(a0)
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: load_const_medium:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:    lui a0, 1
; RV64I-LARGE-NEXT:    lw a0, -16(a0)
; RV64I-LARGE-NEXT:    ret
entry:
  %0 = load i32, ptr inttoptr (i64 4080 to ptr)
  ret i32 %0
}

; The constant here is 0x7ffff800, this value requires LUI+ADDIW on RV64,
; LUI+ADDI would produce a different constant so we can't fold into the load
; offset.
define dso_local i32 @load_const_large() nounwind {
; RV32I-LABEL: load_const_large:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    lui a0, 524288
; RV32I-NEXT:    lw a0, -2048(a0)
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: load_const_large:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:    lui a0, 524288
; RV32I-MEDIUM-NEXT:    lw a0, -2048(a0)
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: load_const_large:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:    lui a0, 524288
; RV64I-NEXT:    addiw a0, a0, -2048
; RV64I-NEXT:    lw a0, 0(a0)
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: load_const_large:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:    lui a0, 524288
; RV64I-MEDIUM-NEXT:    addiw a0, a0, -2048
; RV64I-MEDIUM-NEXT:    lw a0, 0(a0)
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: load_const_large:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:    lui a0, 524288
; RV64I-LARGE-NEXT:    addiw a0, a0, -2048
; RV64I-LARGE-NEXT:    lw a0, 0(a0)
; RV64I-LARGE-NEXT:    ret
entry:
  %0 = load i32, ptr inttoptr (i64 2147481600 to ptr)
  ret i32 %0
}

%struct.S = type { i64, i64 }

define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind {
; RV32I-LABEL: fold_addi_from_different_bb:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    addi sp, sp, -48
; RV32I-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
; RV32I-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
; RV32I-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
; RV32I-NEXT:    sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT:    sw s3, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT:    sw s4, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT:    sw s5, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT:    mv s0, a4
; RV32I-NEXT:    mv s1, a3
; RV32I-NEXT:    mv s2, a2
; RV32I-NEXT:    beqz a3, .LBB20_3
; RV32I-NEXT:  # %bb.1: # %entry
; RV32I-NEXT:    slti a1, s1, 0
; RV32I-NEXT:    beqz a1, .LBB20_4
; RV32I-NEXT:  .LBB20_2:
; RV32I-NEXT:    li s3, 0
; RV32I-NEXT:    li s4, 0
; RV32I-NEXT:    j .LBB20_6
; RV32I-NEXT:  .LBB20_3:
; RV32I-NEXT:    seqz a1, s2
; RV32I-NEXT:    bnez a1, .LBB20_2
; RV32I-NEXT:  .LBB20_4: # %for.body.lr.ph
; RV32I-NEXT:    li s5, 0
; RV32I-NEXT:    li s6, 0
; RV32I-NEXT:    li s3, 0
; RV32I-NEXT:    li s4, 0
; RV32I-NEXT:    slli a0, a0, 4
; RV32I-NEXT:    add s7, s0, a0
; RV32I-NEXT:  .LBB20_5: # %for.body
; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
; RV32I-NEXT:    mv a0, s0
; RV32I-NEXT:    call f
; RV32I-NEXT:    lw a0, 12(s7)
; RV32I-NEXT:    lw a1, 8(s7)
; RV32I-NEXT:    add a0, a0, s4
; RV32I-NEXT:    add s3, a1, s3
; RV32I-NEXT:    sltu s4, s3, a1
; RV32I-NEXT:    addi s5, s5, 1
; RV32I-NEXT:    seqz a1, s5
; RV32I-NEXT:    add s6, s6, a1
; RV32I-NEXT:    xor a1, s5, s2
; RV32I-NEXT:    xor a2, s6, s1
; RV32I-NEXT:    or a1, a1, a2
; RV32I-NEXT:    add s4, a0, s4
; RV32I-NEXT:    bnez a1, .LBB20_5
; RV32I-NEXT:  .LBB20_6: # %for.cond.cleanup
; RV32I-NEXT:    mv a0, s3
; RV32I-NEXT:    mv a1, s4
; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
; RV32I-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
; RV32I-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT:    addi sp, sp, 48
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: fold_addi_from_different_bb:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:    addi sp, sp, -48
; RV32I-MEDIUM-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
; RV32I-MEDIUM-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
; RV32I-MEDIUM-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
; RV32I-MEDIUM-NEXT:    sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-MEDIUM-NEXT:    sw s3, 28(sp) # 4-byte Folded Spill
; RV32I-MEDIUM-NEXT:    sw s4, 24(sp) # 4-byte Folded Spill
; RV32I-MEDIUM-NEXT:    sw s5, 20(sp) # 4-byte Folded Spill
; RV32I-MEDIUM-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
; RV32I-MEDIUM-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
; RV32I-MEDIUM-NEXT:    mv s0, a4
; RV32I-MEDIUM-NEXT:    mv s1, a3
; RV32I-MEDIUM-NEXT:    mv s2, a2
; RV32I-MEDIUM-NEXT:    beqz a3, .LBB20_3
; RV32I-MEDIUM-NEXT:  # %bb.1: # %entry
; RV32I-MEDIUM-NEXT:    slti a1, s1, 0
; RV32I-MEDIUM-NEXT:    beqz a1, .LBB20_4
; RV32I-MEDIUM-NEXT:  .LBB20_2:
; RV32I-MEDIUM-NEXT:    li s3, 0
; RV32I-MEDIUM-NEXT:    li s4, 0
; RV32I-MEDIUM-NEXT:    j .LBB20_6
; RV32I-MEDIUM-NEXT:  .LBB20_3:
; RV32I-MEDIUM-NEXT:    seqz a1, s2
; RV32I-MEDIUM-NEXT:    bnez a1, .LBB20_2
; RV32I-MEDIUM-NEXT:  .LBB20_4: # %for.body.lr.ph
; RV32I-MEDIUM-NEXT:    li s5, 0
; RV32I-MEDIUM-NEXT:    li s6, 0
; RV32I-MEDIUM-NEXT:    li s3, 0
; RV32I-MEDIUM-NEXT:    li s4, 0
; RV32I-MEDIUM-NEXT:    slli a0, a0, 4
; RV32I-MEDIUM-NEXT:    add s7, s0, a0
; RV32I-MEDIUM-NEXT:  .LBB20_5: # %for.body
; RV32I-MEDIUM-NEXT:    # =>This Inner Loop Header: Depth=1
; RV32I-MEDIUM-NEXT:    mv a0, s0
; RV32I-MEDIUM-NEXT:    call f
; RV32I-MEDIUM-NEXT:    lw a0, 12(s7)
; RV32I-MEDIUM-NEXT:    lw a1, 8(s7)
; RV32I-MEDIUM-NEXT:    add a0, a0, s4
; RV32I-MEDIUM-NEXT:    add s3, a1, s3
; RV32I-MEDIUM-NEXT:    sltu s4, s3, a1
; RV32I-MEDIUM-NEXT:    addi s5, s5, 1
; RV32I-MEDIUM-NEXT:    seqz a1, s5
; RV32I-MEDIUM-NEXT:    add s6, s6, a1
; RV32I-MEDIUM-NEXT:    xor a1, s5, s2
; RV32I-MEDIUM-NEXT:    xor a2, s6, s1
; RV32I-MEDIUM-NEXT:    or a1, a1, a2
; RV32I-MEDIUM-NEXT:    add s4, a0, s4
; RV32I-MEDIUM-NEXT:    bnez a1, .LBB20_5
; RV32I-MEDIUM-NEXT:  .LBB20_6: # %for.cond.cleanup
; RV32I-MEDIUM-NEXT:    mv a0, s3
; RV32I-MEDIUM-NEXT:    mv a1, s4
; RV32I-MEDIUM-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
; RV32I-MEDIUM-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
; RV32I-MEDIUM-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
; RV32I-MEDIUM-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
; RV32I-MEDIUM-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
; RV32I-MEDIUM-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
; RV32I-MEDIUM-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
; RV32I-MEDIUM-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
; RV32I-MEDIUM-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
; RV32I-MEDIUM-NEXT:    addi sp, sp, 48
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: fold_addi_from_different_bb:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:    addi sp, sp, -48
; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT:    blez a1, .LBB20_3
; RV64I-NEXT:  # %bb.1: # %for.body.lr.ph
; RV64I-NEXT:    mv s0, a2
; RV64I-NEXT:    mv s1, a1
; RV64I-NEXT:    li s2, 0
; RV64I-NEXT:    slli a0, a0, 4
; RV64I-NEXT:    add s3, a2, a0
; RV64I-NEXT:  .LBB20_2: # %for.body
; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
; RV64I-NEXT:    mv a0, s0
; RV64I-NEXT:    call f
; RV64I-NEXT:    ld a0, 8(s3)
; RV64I-NEXT:    addi s1, s1, -1
; RV64I-NEXT:    add s2, a0, s2
; RV64I-NEXT:    bnez s1, .LBB20_2
; RV64I-NEXT:    j .LBB20_4
; RV64I-NEXT:  .LBB20_3:
; RV64I-NEXT:    li s2, 0
; RV64I-NEXT:  .LBB20_4: # %for.cond.cleanup
; RV64I-NEXT:    mv a0, s2
; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT:    addi sp, sp, 48
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: fold_addi_from_different_bb:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:    addi sp, sp, -48
; RV64I-MEDIUM-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-MEDIUM-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
; RV64I-MEDIUM-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-MEDIUM-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-MEDIUM-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
; RV64I-MEDIUM-NEXT:    blez a1, .LBB20_3
; RV64I-MEDIUM-NEXT:  # %bb.1: # %for.body.lr.ph
; RV64I-MEDIUM-NEXT:    mv s0, a2
; RV64I-MEDIUM-NEXT:    mv s1, a1
; RV64I-MEDIUM-NEXT:    li s2, 0
; RV64I-MEDIUM-NEXT:    slli a0, a0, 4
; RV64I-MEDIUM-NEXT:    add s3, a2, a0
; RV64I-MEDIUM-NEXT:  .LBB20_2: # %for.body
; RV64I-MEDIUM-NEXT:    # =>This Inner Loop Header: Depth=1
; RV64I-MEDIUM-NEXT:    mv a0, s0
; RV64I-MEDIUM-NEXT:    call f
; RV64I-MEDIUM-NEXT:    ld a0, 8(s3)
; RV64I-MEDIUM-NEXT:    addi s1, s1, -1
; RV64I-MEDIUM-NEXT:    add s2, a0, s2
; RV64I-MEDIUM-NEXT:    bnez s1, .LBB20_2
; RV64I-MEDIUM-NEXT:    j .LBB20_4
; RV64I-MEDIUM-NEXT:  .LBB20_3:
; RV64I-MEDIUM-NEXT:    li s2, 0
; RV64I-MEDIUM-NEXT:  .LBB20_4: # %for.cond.cleanup
; RV64I-MEDIUM-NEXT:    mv a0, s2
; RV64I-MEDIUM-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-MEDIUM-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-MEDIUM-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
; RV64I-MEDIUM-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
; RV64I-MEDIUM-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
; RV64I-MEDIUM-NEXT:    addi sp, sp, 48
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: fold_addi_from_different_bb:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:    addi sp, sp, -48
; RV64I-LARGE-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-LARGE-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
; RV64I-LARGE-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-LARGE-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-LARGE-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
; RV64I-LARGE-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
; RV64I-LARGE-NEXT:    blez a1, .LBB20_3
; RV64I-LARGE-NEXT:  # %bb.1: # %for.body.lr.ph
; RV64I-LARGE-NEXT:    mv s0, a2
; RV64I-LARGE-NEXT:    mv s1, a1
; RV64I-LARGE-NEXT:  .Lpcrel_hi14:
; RV64I-LARGE-NEXT:    auipc a1, %pcrel_hi(.LCPI20_0)
; RV64I-LARGE-NEXT:    ld s3, %pcrel_lo(.Lpcrel_hi14)(a1)
; RV64I-LARGE-NEXT:    li s2, 0
; RV64I-LARGE-NEXT:    slli a0, a0, 4
; RV64I-LARGE-NEXT:    add s4, a2, a0
; RV64I-LARGE-NEXT:  .LBB20_2: # %for.body
; RV64I-LARGE-NEXT:    # =>This Inner Loop Header: Depth=1
; RV64I-LARGE-NEXT:    mv a0, s0
; RV64I-LARGE-NEXT:    jalr s3
; RV64I-LARGE-NEXT:    ld a0, 8(s4)
; RV64I-LARGE-NEXT:    addi s1, s1, -1
; RV64I-LARGE-NEXT:    add s2, a0, s2
; RV64I-LARGE-NEXT:    bnez s1, .LBB20_2
; RV64I-LARGE-NEXT:    j .LBB20_4
; RV64I-LARGE-NEXT:  .LBB20_3:
; RV64I-LARGE-NEXT:    li s2, 0
; RV64I-LARGE-NEXT:  .LBB20_4: # %for.cond.cleanup
; RV64I-LARGE-NEXT:    mv a0, s2
; RV64I-LARGE-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-LARGE-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-LARGE-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
; RV64I-LARGE-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
; RV64I-LARGE-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
; RV64I-LARGE-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
; RV64I-LARGE-NEXT:    addi sp, sp, 48
; RV64I-LARGE-NEXT:    ret
entry:
  %cmp4 = icmp sgt i64 %n, 0
  br i1 %cmp4, label %for.body.lr.ph, label %for.cond.cleanup

for.body.lr.ph:                                   ; preds = %entry
  ; TODO: when this GEP is expanded, the resulting `addi` should be folded
  ; into the load in the loop body.
  %y = getelementptr inbounds %struct.S, ptr %a, i64 %k, i32 1
  br label %for.body

for.cond.cleanup:                                 ; preds = %for.body, %entry
  %s.0.lcssa = phi i64 [ 0, %entry ], [ %add, %for.body ]
  ret i64 %s.0.lcssa

for.body:                                         ; preds = %for.body.lr.ph, %for.body
  %i.06 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
  %s.05 = phi i64 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
  call void @f(ptr %a)
  %0 = load i64, ptr %y, align 8
  %add = add nsw i64 %0, %s.05
  %inc = add nuw nsw i64 %i.06, 1
  %exitcond.not = icmp eq i64 %inc, %n
  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}

declare void @f(ptr)

@g = external dso_local global [100 x [100 x i8]]

; This test used to crash due to calling getVRegDef on X0.
define i32 @crash() {
; RV32I-LABEL: crash:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    lui a0, %hi(g+401)
; RV32I-NEXT:    lbu a0, %lo(g+401)(a0)
; RV32I-NEXT:    seqz a0, a0
; RV32I-NEXT:    sw a0, 0(zero)
; RV32I-NEXT:    li a0, 0
; RV32I-NEXT:    ret
;
; RV32I-MEDIUM-LABEL: crash:
; RV32I-MEDIUM:       # %bb.0: # %entry
; RV32I-MEDIUM-NEXT:  .Lpcrel_hi14:
; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g+401)
; RV32I-MEDIUM-NEXT:    lbu a0, %pcrel_lo(.Lpcrel_hi14)(a0)
; RV32I-MEDIUM-NEXT:    seqz a0, a0
; RV32I-MEDIUM-NEXT:    sw a0, 0(zero)
; RV32I-MEDIUM-NEXT:    li a0, 0
; RV32I-MEDIUM-NEXT:    ret
;
; RV64I-LABEL: crash:
; RV64I:       # %bb.0: # %entry
; RV64I-NEXT:    lui a0, %hi(g+401)
; RV64I-NEXT:    lbu a0, %lo(g+401)(a0)
; RV64I-NEXT:    seqz a0, a0
; RV64I-NEXT:    sw a0, 0(zero)
; RV64I-NEXT:    li a0, 0
; RV64I-NEXT:    ret
;
; RV64I-MEDIUM-LABEL: crash:
; RV64I-MEDIUM:       # %bb.0: # %entry
; RV64I-MEDIUM-NEXT:  .Lpcrel_hi14:
; RV64I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(g+401)
; RV64I-MEDIUM-NEXT:    lbu a0, %pcrel_lo(.Lpcrel_hi14)(a0)
; RV64I-MEDIUM-NEXT:    seqz a0, a0
; RV64I-MEDIUM-NEXT:    sw a0, 0(zero)
; RV64I-MEDIUM-NEXT:    li a0, 0
; RV64I-MEDIUM-NEXT:    ret
;
; RV64I-LARGE-LABEL: crash:
; RV64I-LARGE:       # %bb.0: # %entry
; RV64I-LARGE-NEXT:    li a0, 1
; RV64I-LARGE-NEXT:  .Lpcrel_hi15:
; RV64I-LARGE-NEXT:    auipc a1, %pcrel_hi(.LCPI21_0)
; RV64I-LARGE-NEXT:    ld a1, %pcrel_lo(.Lpcrel_hi15)(a1)
; RV64I-LARGE-NEXT:    add a0, a1, a0
; RV64I-LARGE-NEXT:    lbu a0, 400(a0)
; RV64I-LARGE-NEXT:    seqz a0, a0
; RV64I-LARGE-NEXT:    sw a0, 0(zero)
; RV64I-LARGE-NEXT:    li a0, 0
; RV64I-LARGE-NEXT:    ret
entry:
  %idxprom7.peel = sext i32 1 to i64
  br label %for.inc.peel

for.inc.peel:                                     ; preds = %entry
  %arrayidx8.3.peel = getelementptr [100 x [100 x i8]], ptr @g, i64 0, i64 4, i64 %idxprom7.peel
  %0 = load i8, ptr %arrayidx8.3.peel, align 1
  %tobool.not.3.peel = icmp eq i8 %0, 0
  %spec.select = select i1 %tobool.not.3.peel, i32 1, i32 0
  store i32 %spec.select, ptr null, align 4
  ret i32 0
}