1414#[ unsafe( no_mangle) ]
1515fn main ( ) {
1616 let mut x = [ 3.0 ; 256 ] ;
17- kernel_1 ( & mut x) ;
17+ let y = [ 1.0 ; 256 ] ;
18+ kernel_1 ( & mut x, & y) ;
1819 core:: hint:: black_box ( & x) ;
20+ core:: hint:: black_box ( & y) ;
1921}
2022
21- pub fn kernel_1 ( x : & mut [ f32 ; 256 ] ) {
22- core:: intrinsics:: offload ( kernel_1, [ 256 , 1 , 1 ] , [ 32 , 1 , 1 ] , ( x, ) )
23+ #[ unsafe( no_mangle) ]
24+ #[ inline( never) ]
25+ pub fn kernel_1 ( x : & mut [ f32 ; 256 ] , y : & [ f32 ; 256 ] ) {
26+ core:: intrinsics:: offload ( _kernel_1, [ 256 , 1 , 1 ] , [ 32 , 1 , 1 ] , ( x, y) )
2327}
2428
2529#[ unsafe( no_mangle) ]
2630#[ inline( never) ]
27- pub fn _kernel_1 ( x : & mut [ f32 ; 256 ] ) {
31+ pub fn _kernel_1 ( x : & mut [ f32 ; 256 ] , y : & [ f32 ; 256 ] ) {
2832 for i in 0 ..256 {
29- x[ i] = 21.0 ;
33+ x[ i] = 21.0 + y [ i ] ;
3034 }
3135}
3236
@@ -37,42 +41,60 @@ pub fn _kernel_1(x: &mut [f32; 256]) {
3741// CHECK: @anon.[[ID:.*]].0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
3842// CHECK: @anon.{{.*}}.1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @anon.[[ID]].0 }, align 8
3943
40- // CHECK-DAG: @.omp_offloading.descriptor = internal constant { i32, ptr, ptr, ptr } zeroinitializer
41- // CHECK-DAG: @llvm.global_ctors = appending constant [1 x { i32, ptr, ptr } ] [{ i32, ptr, ptr } { i32 101, ptr @.omp_offloading.descriptor_reg, ptr null } ]
42- // CHECK-DAG: @.offload_sizes.[[K:[^ ]*kernel_1]] = private unnamed_addr constant [1 x i64] [i64 1024 ]
43- // CHECK-DAG: @.offload_maptypes.[[K]] = private unnamed_addr constant [1 x i64] [i64 35 ]
44- // CHECK-DAG: @.[[K]] .region_id = internal constant i8 0
45- // CHECK-DAG: @.offloading.entry_name.[[K]] = internal unnamed_addr constant [{{[0-9]+}} x i8] c"[[K]]{{\\00}} ", section ".llvm.rodata.offloading", align 1
46- // CHECK-DAG: @.offloading.entry.[[K]] = internal constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @.[[K]] .region_id, ptr @.offloading.entry_name.[[K]] , i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8
44+ // CHECK-DAG: @.offload_sizes._kernel_1 = private unnamed_addr constant [2 x i64] [i64 1024, i64 1024]
45+ // CHECK-DAG: @.offload_maptypes._kernel_1.begin = private unnamed_addr constant [2 x i64 ] [i64 1, i64 1 ]
46+ // CHECK-DAG: @.offload_maptypes._kernel_1.kernel = private unnamed_addr constant [2 x i64] [i64 32, i64 32 ]
47+ // CHECK-DAG: @.offload_maptypes._kernel_1.end = private unnamed_addr constant [2 x i64] [i64 2, i64 0 ]
48+ // CHECK-DAG: @._kernel_1 .region_id = internal constant i8 0
49+ // CHECK-DAG: @.offloading.entry_name._kernel_1 = internal unnamed_addr constant [10 x i8] c"_kernel_1\00 ", section ".llvm.rodata.offloading", align 1
50+ // CHECK-DAG: @.offloading.entry._kernel_1 = internal constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @._kernel_1 .region_id, ptr @.offloading.entry_name._kernel_1 , i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8
4751
4852// CHECK: declare i32 @__tgt_target_kernel(ptr, i64, i32, i32, ptr, ptr)
4953
5054// CHECK-LABEL: define{{( dso_local)?}} void @main()
5155// CHECK-NEXT: start:
52- // CHECK-NEXT: %0 = alloca [8 x i8], align 8
53- // CHECK-NEXT: %x = alloca [1024 x i8], align 16
54- // CHECK-NEXT: %.offload_baseptrs = alloca [1 x ptr], align 8
55- // CHECK-NEXT: %.offload_ptrs = alloca [1 x ptr], align 8
56- // CHECK-NEXT: %.offload_sizes = alloca [1 x i64], align 8
56+ // CHECK-NEXT: %0 = alloca [8 x i8], align 8
57+ // CHECK-NEXT: %1 = alloca [8 x i8], align 8
58+ // CHECK-NEXT: %y = alloca [1024 x i8], align 16
59+ // CHECK-NEXT: %x = alloca [1024 x i8], align 16
60+ // CHECK: call void @kernel_1(ptr {{.*}} %x, ptr {{.*}} %y)
61+ // CHECK: store ptr %x, ptr %1, align 8
62+ // CHECK: call void asm sideeffect "", "r,~{memory}"(ptr nonnull %1)
63+ // CHECK: store ptr %y, ptr %0, align 8
64+ // CHECK: call void asm sideeffect "", "r,~{memory}"(ptr nonnull %0)
65+ // CHECK: ret void
66+ // CHECK-NEXT: }
67+
68+ // CHECK-LABEL: define{{( dso_local)?}} void @kernel_1(ptr noalias noundef align 4 dereferenceable(1024) %x, ptr noalias noundef readonly align 4 captures(address, read_provenance) dereferenceable(1024) %y)
69+ // CHECK-NEXT: start:
70+ // CHECK-NEXT: %.offload_baseptrs = alloca [2 x ptr], align 8
71+ // CHECK-NEXT: %.offload_ptrs = alloca [2 x ptr], align 8
72+ // CHECK-NEXT: %.offload_sizes = alloca [2 x i64], align 8
5773// CHECK-NEXT: %kernel_args = alloca %struct.__tgt_kernel_arguments, align 8
58- // CHECK: %dummy = load volatile ptr, ptr @.offload_sizes.[[K]] , align 8
59- // CHECK-NEXT: %dummy1 = load volatile ptr, ptr @.offloading.entry.[[K]] , align 8
74+ // CHECK-NEXT : %dummy = load volatile ptr, ptr @.offload_sizes._kernel_1 , align 8
75+ // CHECK-NEXT: %dummy1 = load volatile ptr, ptr @.offloading.entry._kernel_1 , align 8
6076// CHECK-NEXT: call void @__tgt_init_all_rtls()
6177// CHECK-NEXT: store ptr %x, ptr %.offload_baseptrs, align 8
6278// CHECK-NEXT: store ptr %x, ptr %.offload_ptrs, align 8
6379// CHECK-NEXT: store i64 1024, ptr %.offload_sizes, align 8
64- // CHECK-NEXT: call void @__tgt_target_data_begin_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes.[[K]], ptr null, ptr null)
80+ // CHECK-NEXT: [[BPTRS_0:%.*]] = getelementptr inbounds nuw i8, ptr %.offload_baseptrs, i64 8
81+ // CHECK-NEXT: store ptr %y, ptr [[BPTRS_0]], align 8
82+ // CHECK-NEXT: [[PTRS_1:%.*]] = getelementptr inbounds nuw i8, ptr %.offload_ptrs, i64 8
83+ // CHECK-NEXT: store ptr %y, ptr [[PTRS_1]], align 8
84+ // CHECK-NEXT: [[SIZES_1:%.*]] = getelementptr inbounds nuw i8, ptr %.offload_sizes, i64 8
85+ // CHECK-NEXT: store i64 1024, ptr [[SIZES_1]], align 8
86+ // CHECK-NEXT: call void @__tgt_target_data_begin_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 2, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes._kernel_1.begin, ptr null, ptr null)
6587// CHECK-NEXT: store i32 3, ptr %kernel_args, align 8
6688// CHECK-NEXT: [[P4:%[^ ]+]] = getelementptr inbounds nuw i8, ptr %kernel_args, i64 4
67- // CHECK-NEXT: store i32 1 , ptr [[P4]], align 4
89+ // CHECK-NEXT: store i32 2 , ptr [[P4]], align 4
6890// CHECK-NEXT: [[P8:%[^ ]+]] = getelementptr inbounds nuw i8, ptr %kernel_args, i64 8
6991// CHECK-NEXT: store ptr %.offload_baseptrs, ptr [[P8]], align 8
7092// CHECK-NEXT: [[P16:%[^ ]+]] = getelementptr inbounds nuw i8, ptr %kernel_args, i64 16
7193// CHECK-NEXT: store ptr %.offload_ptrs, ptr [[P16]], align 8
7294// CHECK-NEXT: [[P24:%[^ ]+]] = getelementptr inbounds nuw i8, ptr %kernel_args, i64 24
7395// CHECK-NEXT: store ptr %.offload_sizes, ptr [[P24]], align 8
7496// CHECK-NEXT: [[P32:%[^ ]+]] = getelementptr inbounds nuw i8, ptr %kernel_args, i64 32
75- // CHECK-NEXT: store ptr @.offload_maptypes.[[K]] , ptr [[P32]], align 8
97+ // CHECK-NEXT: store ptr @.offload_maptypes._kernel_1.kernel , ptr [[P32]], align 8
7698// CHECK-NEXT: [[P40:%[^ ]+]] = getelementptr inbounds nuw i8, ptr %kernel_args, i64 40
7799// CHECK-NEXT: [[P72:%[^ ]+]] = getelementptr inbounds nuw i8, ptr %kernel_args, i64 72
78100// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) [[P40]], i8 0, i64 32, i1 false)
@@ -83,9 +105,9 @@ pub fn _kernel_1(x: &mut [f32; 256]) {
83105// CHECK-NEXT: store i32 1, ptr [[P92]], align 4
84106// CHECK-NEXT: [[P96:%[^ ]+]] = getelementptr inbounds nuw i8, ptr %kernel_args, i64 96
85107// CHECK-NEXT: store i32 0, ptr [[P96]], align 8
86- // CHECK-NEXT: {{%[^ ]+}} = call i32 @__tgt_target_kernel(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 256, i32 32, ptr nonnull @.[[K]] .region_id, ptr nonnull %kernel_args)
87- // CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 1 , ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes.[[K]] , ptr null, ptr null)
88- // CHECK: ret void
108+ // CHECK-NEXT: [[TGT_RET:%.*]] = call i32 @__tgt_target_kernel(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 256, i32 32, ptr nonnull @._kernel_1 .region_id, ptr nonnull %kernel_args)
109+ // CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 2 , ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes._kernel_1.end , ptr null, ptr null)
110+ // CHECK-NEXT: ret void
89111// CHECK-NEXT: }
90112
91113// CHECK: declare void @__tgt_register_lib(ptr) local_unnamed_addr
0 commit comments