llvm/clang/test/OpenMP/parallel_master_codegen.cpp

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// expected-no-diagnostics
#ifndef HEADER
#define HEADER

#ifdef CK1
///==========================================================================///
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK1
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1

// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"


void foo() { extern void mayThrow(); mayThrow(); }

void parallel_master() {
#pragma omp parallel master
  foo();
}



#endif

#ifdef CK2
///==========================================================================///
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK5
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK5

// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"


void parallel_master_private() {
  int a;
#pragma omp parallel master private(a)
  a++;
}



#endif

#ifdef CK3
///==========================================================================///
// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK9
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK9

// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"


void parallel_master_private() {
  int a;
#pragma omp parallel master default(shared)
  a++;
}



#endif

#ifdef CK31
///==========================================================================///
// RUN: %clang_cc1 -DCK31 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK13
// RUN: %clang_cc1 -DCK31 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK31 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK13

// RUN: %clang_cc1 -DCK31 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// RUN: %clang_cc1 -DCK31 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK31 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"


void parallel_master_default_firstprivate() {
  int a;
#pragma omp parallel master default(firstprivate)
  a++;
}





#endif

#ifdef CK32
///==========================================================================///
// RUN: %clang_cc1 -DCK32 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK17
// RUN: %clang_cc1 -DCK32 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK32 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK17

// RUN: %clang_cc1 -DCK32 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// RUN: %clang_cc1 -DCK32 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK32 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"

struct St {
  int a, b;
  static int y;
  St() : a(0), b(0) {}
  ~St() {}
};
int St::y = 0;

void parallel_master_default_firstprivate() {
  St a = St();
  static int y = 0;
#pragma omp parallel master default(firstprivate)
  {
    a.a += 1;
    a.b += 1;
    y++;
    a.y++;
  }
}








#endif

#ifdef CK4
///==========================================================================///
// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK21
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK21

// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"


void parallel_master_firstprivate() {
  int a;
#pragma omp parallel master firstprivate(a)
  a++;
}



#endif

#ifdef CK5
///==========================================================================///
// RUN: %clang_cc1 -DCK5 -verify -fopenmp -fopenmp -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK25
// RUN: %clang_cc1 -DCK5 -fopenmp -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK5 -fopenmp -fopenmp -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK25

// RUN: %clang_cc1 -DCK5 -verify -fopenmp-simd -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
// RUN: %clang_cc1 -DCK5 -fopenmp-simd -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK5 -fopenmp-simd -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"

// RUN: %clang_cc1 -DCK5 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK29
// RUN: %clang_cc1 -DCK5 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s

// RUN: %clang_cc1 -DCK5 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s


int a;
#pragma omp threadprivate(a)

void parallel_master_copyin() {
#pragma omp parallel master copyin(a)
  a++;
}






// TLC-CHECK-DAG:   [[INC:%.+]] = add nsw i32 [[TEN]], 1
// TLC-CHECK-DAG:   store i32 [[INC]], ptr [[TEN]]

#endif
#ifdef CK6
///==========================================================================///
// RUN: %clang_cc1 -DCK6 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s

// RUN: %clang_cc1 -DCK6 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s


void parallel_master_reduction() {
  int g;
#pragma omp parallel master reduction(+:g)
  g = 1;
}





// switch

// case 1:

// case 2:

#endif
#ifdef CK7
///==========================================================================///
// RUN: %clang_cc1 -DCK7 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s

// RUN: %clang_cc1 -DCK7 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s


void parallel_master_if() {
#pragma omp parallel master if (parallel: false)
  parallel_master_if();
}



#endif
#ifdef CK8
///==========================================================================///
// RUN: %clang_cc1 -DCK8 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s

// RUN: %clang_cc1 -DCK8 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s

typedef __INTPTR_TYPE__ intptr_t;


void foo();

struct S {
  intptr_t a, b, c;
  S(intptr_t a) : a(a) {}
  operator char() { return a; }
  ~S() {}
};

template <typename T>
T tmain() {
#pragma omp parallel master proc_bind(master)
  foo();
  return T();
}

int main() {
#pragma omp parallel master proc_bind(spread)
  foo();
#pragma omp parallel master proc_bind(close)
  foo();
  return tmain<int>();
}




#endif
#ifdef CK9
///==========================================================================///
// RUN: %clang_cc1 -DCK9 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s

// RUN: %clang_cc1 -DCK9 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
typedef void **omp_allocator_handle_t;
extern const omp_allocator_handle_t omp_null_allocator;
extern const omp_allocator_handle_t omp_default_mem_alloc;
extern const omp_allocator_handle_t omp_large_cap_mem_alloc;
extern const omp_allocator_handle_t omp_const_mem_alloc;
extern const omp_allocator_handle_t omp_high_bw_mem_alloc;
extern const omp_allocator_handle_t omp_low_lat_mem_alloc;
extern const omp_allocator_handle_t omp_cgroup_mem_alloc;
extern const omp_allocator_handle_t omp_pteam_mem_alloc;
extern const omp_allocator_handle_t omp_thread_mem_alloc;

void parallel_master_allocate() {
  int a;
  omp_allocator_handle_t myalloc = nullptr;
#pragma omp parallel master firstprivate(a) allocate(myalloc:a)
  a++;
}


#endif
#endif
// CHECK1-LABEL: define {{[^@]+}}@_Z3foov
// CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    call void @_Z8mayThrowv()
// CHECK1-NEXT:    ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_Z15parallel_masterv
// CHECK1-SAME: () #[[ATTR2:[0-9]+]] {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @_Z15parallel_masterv.omp_outlined)
// CHECK1-NEXT:    ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_Z15parallel_masterv.omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] personality ptr @__gxx_personality_v0 {
// CHECK1-NEXT:  entry:
// CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK1:       omp_if.then:
// CHECK1-NEXT:    invoke void @_Z3foov()
// CHECK1-NEXT:            to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]]
// CHECK1:       invoke.cont:
// CHECK1-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK1-NEXT:    br label [[OMP_IF_END]]
// CHECK1:       omp_if.end:
// CHECK1-NEXT:    ret void
// CHECK1:       terminate.lpad:
// CHECK1-NEXT:    [[TMP4:%.*]] = landingpad { ptr, i32 }
// CHECK1-NEXT:            catch ptr null
// CHECK1-NEXT:    [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0
// CHECK1-NEXT:    call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR6:[0-9]+]]
// CHECK1-NEXT:    unreachable
//
//
// CHECK1-LABEL: define {{[^@]+}}@__clang_call_terminate
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] comdat {
// CHECK1-NEXT:    [[TMP2:%.*]] = call ptr @__cxa_begin_catch(ptr [[TMP0]]) #[[ATTR4:[0-9]+]]
// CHECK1-NEXT:    call void @_ZSt9terminatev() #[[ATTR6]]
// CHECK1-NEXT:    unreachable
//
//
// CHECK5-LABEL: define {{[^@]+}}@_Z23parallel_master_privatev
// CHECK5-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK5-NEXT:  entry:
// CHECK5-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK5-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @_Z23parallel_master_privatev.omp_outlined)
// CHECK5-NEXT:    ret void
//
//
// CHECK5-LABEL: define {{[^@]+}}@_Z23parallel_master_privatev.omp_outlined
// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK5-NEXT:  entry:
// CHECK5-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK5-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK5-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK5-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK5-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK5-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK5-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK5-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK5-NEXT:    br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK5:       omp_if.then:
// CHECK5-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A]], align 4
// CHECK5-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK5-NEXT:    store i32 [[INC]], ptr [[A]], align 4
// CHECK5-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK5-NEXT:    br label [[OMP_IF_END]]
// CHECK5:       omp_if.end:
// CHECK5-NEXT:    ret void
//
//
// CHECK9-LABEL: define {{[^@]+}}@_Z23parallel_master_privatev
// CHECK9-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK9-NEXT:  entry:
// CHECK9-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK9-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @_Z23parallel_master_privatev.omp_outlined, ptr [[A]])
// CHECK9-NEXT:    ret void
//
//
// CHECK9-LABEL: define {{[^@]+}}@_Z23parallel_master_privatev.omp_outlined
// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK9-NEXT:  entry:
// CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8
// CHECK9-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK9-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK9-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK9-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK9-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
// CHECK9-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]])
// CHECK9-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
// CHECK9-NEXT:    br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK9:       omp_if.then:
// CHECK9-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK9-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK9-NEXT:    store i32 [[INC]], ptr [[TMP0]], align 4
// CHECK9-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]])
// CHECK9-NEXT:    br label [[OMP_IF_END]]
// CHECK9:       omp_if.end:
// CHECK9-NEXT:    ret void
//
//
// CHECK13-LABEL: define {{[^@]+}}@_Z36parallel_master_default_firstprivatev
// CHECK13-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK13-NEXT:  entry:
// CHECK13-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK13-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK13-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// CHECK13-NEXT:    store i32 [[TMP0]], ptr [[A_CASTED]], align 4
// CHECK13-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
// CHECK13-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @_Z36parallel_master_default_firstprivatev.omp_outlined, i64 [[TMP1]])
// CHECK13-NEXT:    ret void
//
//
// CHECK13-LABEL: define {{[^@]+}}@_Z36parallel_master_default_firstprivatev.omp_outlined
// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK13-NEXT:  entry:
// CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK13-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK13-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK13-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK13-NEXT:    store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK13-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK13-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK13-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK13-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK13-NEXT:    br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK13:       omp_if.then:
// CHECK13-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK13-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK13-NEXT:    store i32 [[INC]], ptr [[A_ADDR]], align 4
// CHECK13-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK13-NEXT:    br label [[OMP_IF_END]]
// CHECK13:       omp_if.end:
// CHECK13-NEXT:    ret void
//
//
// CHECK17-LABEL: define {{[^@]+}}@_Z36parallel_master_default_firstprivatev
// CHECK17-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK17-NEXT:  entry:
// CHECK17-NEXT:    [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 4
// CHECK17-NEXT:    [[Y_CASTED:%.*]] = alloca i64, align 8
// CHECK17-NEXT:    [[Y_CASTED1:%.*]] = alloca i64, align 8
// CHECK17-NEXT:    call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[A]])
// CHECK17-NEXT:    [[TMP0:%.*]] = load i32, ptr @_ZZ36parallel_master_default_firstprivatevE1y, align 4
// CHECK17-NEXT:    store i32 [[TMP0]], ptr [[Y_CASTED]], align 4
// CHECK17-NEXT:    [[TMP1:%.*]] = load i64, ptr [[Y_CASTED]], align 8
// CHECK17-NEXT:    [[TMP2:%.*]] = load i32, ptr @_ZN2St1yE, align 4
// CHECK17-NEXT:    store i32 [[TMP2]], ptr [[Y_CASTED1]], align 4
// CHECK17-NEXT:    [[TMP3:%.*]] = load i64, ptr [[Y_CASTED1]], align 8
// CHECK17-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 3, ptr @_Z36parallel_master_default_firstprivatev.omp_outlined, ptr [[A]], i64 [[TMP1]], i64 [[TMP3]])
// CHECK17-NEXT:    call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[A]]) #[[ATTR3:[0-9]+]]
// CHECK17-NEXT:    ret void
//
//
// CHECK17-LABEL: define {{[^@]+}}@_ZN2StC1Ev
// CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
// CHECK17-NEXT:  entry:
// CHECK17-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
// CHECK17-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// CHECK17-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK17-NEXT:    call void @_ZN2StC2Ev(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]])
// CHECK17-NEXT:    ret void
//
//
// CHECK17-LABEL: define {{[^@]+}}@_Z36parallel_master_default_firstprivatev.omp_outlined
// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], i64 noundef [[Y:%.*]], i64 noundef [[Y1:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK17-NEXT:  entry:
// CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK17-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK17-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8
// CHECK17-NEXT:    [[Y_ADDR:%.*]] = alloca i64, align 8
// CHECK17-NEXT:    [[Y_ADDR2:%.*]] = alloca i64, align 8
// CHECK17-NEXT:    [[A3:%.*]] = alloca [[STRUCT_ST:%.*]], align 4
// CHECK17-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK17-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK17-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK17-NEXT:    store i64 [[Y]], ptr [[Y_ADDR]], align 8
// CHECK17-NEXT:    store i64 [[Y1]], ptr [[Y_ADDR2]], align 8
// CHECK17-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK17-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[A3]], ptr align 4 [[TMP0]], i64 8, i1 false)
// CHECK17-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK17-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
// CHECK17-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]])
// CHECK17-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
// CHECK17-NEXT:    br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK17:       omp_if.then:
// CHECK17-NEXT:    [[A4:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[A3]], i32 0, i32 0
// CHECK17-NEXT:    [[TMP5:%.*]] = load i32, ptr [[A4]], align 4
// CHECK17-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK17-NEXT:    store i32 [[ADD]], ptr [[A4]], align 4
// CHECK17-NEXT:    [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[A3]], i32 0, i32 1
// CHECK17-NEXT:    [[TMP6:%.*]] = load i32, ptr [[B]], align 4
// CHECK17-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK17-NEXT:    store i32 [[ADD5]], ptr [[B]], align 4
// CHECK17-NEXT:    [[TMP7:%.*]] = load i32, ptr [[Y_ADDR]], align 4
// CHECK17-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP7]], 1
// CHECK17-NEXT:    store i32 [[INC]], ptr [[Y_ADDR]], align 4
// CHECK17-NEXT:    [[TMP8:%.*]] = load i32, ptr @_ZN2St1yE, align 4
// CHECK17-NEXT:    [[INC6:%.*]] = add nsw i32 [[TMP8]], 1
// CHECK17-NEXT:    store i32 [[INC6]], ptr @_ZN2St1yE, align 4
// CHECK17-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]])
// CHECK17-NEXT:    br label [[OMP_IF_END]]
// CHECK17:       omp_if.end:
// CHECK17-NEXT:    call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[A3]]) #[[ATTR3]]
// CHECK17-NEXT:    ret void
//
//
// CHECK17-LABEL: define {{[^@]+}}@_ZN2StD1Ev
// CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR4:[0-9]+]] comdat align 2 {
// CHECK17-NEXT:  entry:
// CHECK17-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
// CHECK17-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// CHECK17-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK17-NEXT:    call void @_ZN2StD2Ev(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR3]]
// CHECK17-NEXT:    ret void
//
//
// CHECK17-LABEL: define {{[^@]+}}@_ZN2StC2Ev
// CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 {
// CHECK17-NEXT:  entry:
// CHECK17-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
// CHECK17-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// CHECK17-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK17-NEXT:    [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0
// CHECK17-NEXT:    store i32 0, ptr [[A]], align 4
// CHECK17-NEXT:    [[B:%.*]] = getelementptr inbounds nuw [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1
// CHECK17-NEXT:    store i32 0, ptr [[B]], align 4
// CHECK17-NEXT:    ret void
//
//
// CHECK17-LABEL: define {{[^@]+}}@_ZN2StD2Ev
// CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 {
// CHECK17-NEXT:  entry:
// CHECK17-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
// CHECK17-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
// CHECK17-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK17-NEXT:    ret void
//
//
// CHECK21-LABEL: define {{[^@]+}}@_Z28parallel_master_firstprivatev
// CHECK21-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK21-NEXT:  entry:
// CHECK21-NEXT:    [[A:%.*]] = alloca i32, align 4
// CHECK21-NEXT:    [[A_CASTED:%.*]] = alloca i64, align 8
// CHECK21-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
// CHECK21-NEXT:    store i32 [[TMP0]], ptr [[A_CASTED]], align 4
// CHECK21-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
// CHECK21-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @_Z28parallel_master_firstprivatev.omp_outlined, i64 [[TMP1]])
// CHECK21-NEXT:    ret void
//
//
// CHECK21-LABEL: define {{[^@]+}}@_Z28parallel_master_firstprivatev.omp_outlined
// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK21-NEXT:  entry:
// CHECK21-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK21-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK21-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK21-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK21-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK21-NEXT:    store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK21-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK21-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK21-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK21-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK21-NEXT:    br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK21:       omp_if.then:
// CHECK21-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK21-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK21-NEXT:    store i32 [[INC]], ptr [[A_ADDR]], align 4
// CHECK21-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK21-NEXT:    br label [[OMP_IF_END]]
// CHECK21:       omp_if.end:
// CHECK21-NEXT:    ret void
//
//
// CHECK25-LABEL: define {{[^@]+}}@_Z22parallel_master_copyinv
// CHECK25-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK25-NEXT:  entry:
// CHECK25-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @_Z22parallel_master_copyinv.omp_outlined)
// CHECK25-NEXT:    ret void
//
//
// CHECK25-LABEL: define {{[^@]+}}@_Z22parallel_master_copyinv.omp_outlined
// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK25-NEXT:  entry:
// CHECK25-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK25-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK25-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK25-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK25-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK25-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK25-NEXT:    [[TMP2:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @a, i64 4, ptr @a.cache.)
// CHECK25-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64
// CHECK25-NEXT:    [[TMP4:%.*]] = icmp ne i64 ptrtoint (ptr @a to i64), [[TMP3]]
// CHECK25-NEXT:    br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]]
// CHECK25:       copyin.not.master:
// CHECK25-NEXT:    [[TMP5:%.*]] = load i32, ptr @a, align 4
// CHECK25-NEXT:    store i32 [[TMP5]], ptr [[TMP2]], align 4
// CHECK25-NEXT:    br label [[COPYIN_NOT_MASTER_END]]
// CHECK25:       copyin.not.master.end:
// CHECK25-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]])
// CHECK25-NEXT:    [[TMP6:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK25-NEXT:    [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK25-NEXT:    br i1 [[TMP7]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK25:       omp_if.then:
// CHECK25-NEXT:    [[TMP8:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @a, i64 4, ptr @a.cache.)
// CHECK25-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
// CHECK25-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP9]], 1
// CHECK25-NEXT:    store i32 [[INC]], ptr [[TMP8]], align 4
// CHECK25-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK25-NEXT:    br label [[OMP_IF_END]]
// CHECK25:       omp_if.end:
// CHECK25-NEXT:    ret void
//
//
// CHECK29-LABEL: define {{[^@]+}}@_Z22parallel_master_copyinv
// CHECK29-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK29-NEXT:  entry:
// CHECK29-NEXT:    [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a)
// CHECK29-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @_Z22parallel_master_copyinv.omp_outlined, ptr [[TMP0]])
// CHECK29-NEXT:    ret void
//
//
// CHECK29-LABEL: define {{[^@]+}}@_Z22parallel_master_copyinv.omp_outlined
// CHECK29-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK29-NEXT:  entry:
// CHECK29-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK29-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK29-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8
// CHECK29-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK29-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK29-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK29-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK29-NEXT:    [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a)
// CHECK29-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64
// CHECK29-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[TMP1]] to i64
// CHECK29-NEXT:    [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]]
// CHECK29-NEXT:    br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]]
// CHECK29:       copyin.not.master:
// CHECK29-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK29-NEXT:    store i32 [[TMP5]], ptr [[TMP1]], align 4
// CHECK29-NEXT:    br label [[COPYIN_NOT_MASTER_END]]
// CHECK29:       copyin.not.master.end:
// CHECK29-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK29-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
// CHECK29-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]])
// CHECK29-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK29-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
// CHECK29-NEXT:    [[TMP10:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[TMP9]])
// CHECK29-NEXT:    [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
// CHECK29-NEXT:    br i1 [[TMP11]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK29:       omp_if.then:
// CHECK29-NEXT:    [[TMP12:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a)
// CHECK29-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
// CHECK29-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK29-NEXT:    store i32 [[INC]], ptr [[TMP12]], align 4
// CHECK29-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[TMP9]])
// CHECK29-NEXT:    br label [[OMP_IF_END]]
// CHECK29:       omp_if.end:
// CHECK29-NEXT:    ret void
//
//
// CHECK29-LABEL: define {{[^@]+}}@_ZTW1a
// CHECK29-SAME: () #[[ATTR5:[0-9]+]] comdat {
// CHECK29-NEXT:    [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a)
// CHECK29-NEXT:    ret ptr [[TMP1]]
//