Skip to content

Commit 1c28bd8

Browse files
committed
update testcases
1 parent ec9cce4 commit 1c28bd8

2 files changed

Lines changed: 49 additions & 27 deletions

File tree

tests/codegen-llvm/gpu_offload/control_flow.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919
// CHECK: br label %bb3
2020
// CHECK-NOT define
2121
// CHECK: bb3
22-
// CHECK: call void @__tgt_target_data_begin_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes.foo, ptr null, ptr null)
22+
// CHECK: call void @__tgt_target_data_begin_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes.foo.begin, ptr null, ptr null)
2323
// CHECK: %10 = call i32 @__tgt_target_kernel(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 256, i32 32, ptr nonnull @.foo.region_id, ptr nonnull %kernel_args)
24-
// CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes.foo, ptr null, ptr null)
24+
// CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes.foo.end, ptr null, ptr null)
2525
#[unsafe(no_mangle)]
2626
unsafe fn main() {
2727
let A = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0];

tests/codegen-llvm/gpu_offload/gpu_host.rs

Lines changed: 47 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,23 @@
1414
#[unsafe(no_mangle)]
1515
fn main() {
1616
let mut x = [3.0; 256];
17-
kernel_1(&mut x);
17+
let y = [1.0; 256];
18+
kernel_1(&mut x, &y);
1819
core::hint::black_box(&x);
20+
core::hint::black_box(&y);
1921
}
2022

21-
pub fn kernel_1(x: &mut [f32; 256]) {
22-
core::intrinsics::offload(kernel_1, [256, 1, 1], [32, 1, 1], (x,))
23+
#[unsafe(no_mangle)]
24+
#[inline(never)]
25+
pub fn kernel_1(x: &mut [f32; 256], y: &[f32; 256]) {
26+
core::intrinsics::offload(_kernel_1, [256, 1, 1], [32, 1, 1], (x, y))
2327
}
2428

2529
#[unsafe(no_mangle)]
2630
#[inline(never)]
27-
pub fn _kernel_1(x: &mut [f32; 256]) {
31+
pub fn _kernel_1(x: &mut [f32; 256], y: &[f32; 256]) {
2832
for i in 0..256 {
29-
x[i] = 21.0;
33+
x[i] = 21.0 + y[i];
3034
}
3135
}
3236

@@ -37,42 +41,60 @@ pub fn _kernel_1(x: &mut [f32; 256]) {
3741
// CHECK: @anon.[[ID:.*]].0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
3842
// CHECK: @anon.{{.*}}.1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @anon.[[ID]].0 }, align 8
3943

40-
// CHECK-DAG: @.omp_offloading.descriptor = internal constant { i32, ptr, ptr, ptr } zeroinitializer
41-
// CHECK-DAG: @llvm.global_ctors = appending constant [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.omp_offloading.descriptor_reg, ptr null }]
42-
// CHECK-DAG: @.offload_sizes.[[K:[^ ]*kernel_1]] = private unnamed_addr constant [1 x i64] [i64 1024]
43-
// CHECK-DAG: @.offload_maptypes.[[K]] = private unnamed_addr constant [1 x i64] [i64 35]
44-
// CHECK-DAG: @.[[K]].region_id = internal constant i8 0
45-
// CHECK-DAG: @.offloading.entry_name.[[K]] = internal unnamed_addr constant [{{[0-9]+}} x i8] c"[[K]]{{\\00}}", section ".llvm.rodata.offloading", align 1
46-
// CHECK-DAG: @.offloading.entry.[[K]] = internal constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @.[[K]].region_id, ptr @.offloading.entry_name.[[K]], i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8
44+
// CHECK-DAG: @.offload_sizes._kernel_1 = private unnamed_addr constant [2 x i64] [i64 1024, i64 1024]
45+
// CHECK-DAG: @.offload_maptypes._kernel_1.begin = private unnamed_addr constant [2 x i64] [i64 1, i64 1]
46+
// CHECK-DAG: @.offload_maptypes._kernel_1.kernel = private unnamed_addr constant [2 x i64] [i64 32, i64 32]
47+
// CHECK-DAG: @.offload_maptypes._kernel_1.end = private unnamed_addr constant [2 x i64] [i64 2, i64 0]
48+
// CHECK-DAG: @._kernel_1.region_id = internal constant i8 0
49+
// CHECK-DAG: @.offloading.entry_name._kernel_1 = internal unnamed_addr constant [10 x i8] c"_kernel_1\00", section ".llvm.rodata.offloading", align 1
50+
// CHECK-DAG: @.offloading.entry._kernel_1 = internal constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @._kernel_1.region_id, ptr @.offloading.entry_name._kernel_1, i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8
4751

4852
// CHECK: declare i32 @__tgt_target_kernel(ptr, i64, i32, i32, ptr, ptr)
4953

5054
// CHECK-LABEL: define{{( dso_local)?}} void @main()
5155
// CHECK-NEXT: start:
52-
// CHECK-NEXT: %0 = alloca [8 x i8], align 8
53-
// CHECK-NEXT: %x = alloca [1024 x i8], align 16
54-
// CHECK-NEXT: %.offload_baseptrs = alloca [1 x ptr], align 8
55-
// CHECK-NEXT: %.offload_ptrs = alloca [1 x ptr], align 8
56-
// CHECK-NEXT: %.offload_sizes = alloca [1 x i64], align 8
56+
// CHECK-NEXT: %0 = alloca [8 x i8], align 8
57+
// CHECK-NEXT: %1 = alloca [8 x i8], align 8
58+
// CHECK-NEXT: %y = alloca [1024 x i8], align 16
59+
// CHECK-NEXT: %x = alloca [1024 x i8], align 16
60+
// CHECK: call void @kernel_1(ptr {{.*}} %x, ptr {{.*}} %y)
61+
// CHECK: store ptr %x, ptr %1, align 8
62+
// CHECK: call void asm sideeffect "", "r,~{memory}"(ptr nonnull %1)
63+
// CHECK: store ptr %y, ptr %0, align 8
64+
// CHECK: call void asm sideeffect "", "r,~{memory}"(ptr nonnull %0)
65+
// CHECK: ret void
66+
// CHECK-NEXT: }
67+
68+
// CHECK-LABEL: define{{( dso_local)?}} void @kernel_1(ptr noalias noundef align 4 dereferenceable(1024) %x, ptr noalias noundef readonly align 4 captures(address, read_provenance) dereferenceable(1024) %y)
69+
// CHECK-NEXT: start:
70+
// CHECK-NEXT: %.offload_baseptrs = alloca [2 x ptr], align 8
71+
// CHECK-NEXT: %.offload_ptrs = alloca [2 x ptr], align 8
72+
// CHECK-NEXT: %.offload_sizes = alloca [2 x i64], align 8
5773
// CHECK-NEXT: %kernel_args = alloca %struct.__tgt_kernel_arguments, align 8
58-
// CHECK: %dummy = load volatile ptr, ptr @.offload_sizes.[[K]], align 8
59-
// CHECK-NEXT: %dummy1 = load volatile ptr, ptr @.offloading.entry.[[K]], align 8
74+
// CHECK-NEXT: %dummy = load volatile ptr, ptr @.offload_sizes._kernel_1, align 8
75+
// CHECK-NEXT: %dummy1 = load volatile ptr, ptr @.offloading.entry._kernel_1, align 8
6076
// CHECK-NEXT: call void @__tgt_init_all_rtls()
6177
// CHECK-NEXT: store ptr %x, ptr %.offload_baseptrs, align 8
6278
// CHECK-NEXT: store ptr %x, ptr %.offload_ptrs, align 8
6379
// CHECK-NEXT: store i64 1024, ptr %.offload_sizes, align 8
64-
// CHECK-NEXT: call void @__tgt_target_data_begin_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes.[[K]], ptr null, ptr null)
80+
// CHECK-NEXT: [[BPTRS_0:%.*]] = getelementptr inbounds nuw i8, ptr %.offload_baseptrs, i64 8
81+
// CHECK-NEXT: store ptr %y, ptr [[BPTRS_0]], align 8
82+
// CHECK-NEXT: [[PTRS_1:%.*]] = getelementptr inbounds nuw i8, ptr %.offload_ptrs, i64 8
83+
// CHECK-NEXT: store ptr %y, ptr [[PTRS_1]], align 8
84+
// CHECK-NEXT: [[SIZES_1:%.*]] = getelementptr inbounds nuw i8, ptr %.offload_sizes, i64 8
85+
// CHECK-NEXT: store i64 1024, ptr [[SIZES_1]], align 8
86+
// CHECK-NEXT: call void @__tgt_target_data_begin_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 2, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes._kernel_1.begin, ptr null, ptr null)
6587
// CHECK-NEXT: store i32 3, ptr %kernel_args, align 8
6688
// CHECK-NEXT: [[P4:%[^ ]+]] = getelementptr inbounds nuw i8, ptr %kernel_args, i64 4
67-
// CHECK-NEXT: store i32 1, ptr [[P4]], align 4
89+
// CHECK-NEXT: store i32 2, ptr [[P4]], align 4
6890
// CHECK-NEXT: [[P8:%[^ ]+]] = getelementptr inbounds nuw i8, ptr %kernel_args, i64 8
6991
// CHECK-NEXT: store ptr %.offload_baseptrs, ptr [[P8]], align 8
7092
// CHECK-NEXT: [[P16:%[^ ]+]] = getelementptr inbounds nuw i8, ptr %kernel_args, i64 16
7193
// CHECK-NEXT: store ptr %.offload_ptrs, ptr [[P16]], align 8
7294
// CHECK-NEXT: [[P24:%[^ ]+]] = getelementptr inbounds nuw i8, ptr %kernel_args, i64 24
7395
// CHECK-NEXT: store ptr %.offload_sizes, ptr [[P24]], align 8
7496
// CHECK-NEXT: [[P32:%[^ ]+]] = getelementptr inbounds nuw i8, ptr %kernel_args, i64 32
75-
// CHECK-NEXT: store ptr @.offload_maptypes.[[K]], ptr [[P32]], align 8
97+
// CHECK-NEXT: store ptr @.offload_maptypes._kernel_1.kernel, ptr [[P32]], align 8
7698
// CHECK-NEXT: [[P40:%[^ ]+]] = getelementptr inbounds nuw i8, ptr %kernel_args, i64 40
7799
// CHECK-NEXT: [[P72:%[^ ]+]] = getelementptr inbounds nuw i8, ptr %kernel_args, i64 72
78100
// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) [[P40]], i8 0, i64 32, i1 false)
@@ -83,9 +105,9 @@ pub fn _kernel_1(x: &mut [f32; 256]) {
83105
// CHECK-NEXT: store i32 1, ptr [[P92]], align 4
84106
// CHECK-NEXT: [[P96:%[^ ]+]] = getelementptr inbounds nuw i8, ptr %kernel_args, i64 96
85107
// CHECK-NEXT: store i32 0, ptr [[P96]], align 8
86-
// CHECK-NEXT: {{%[^ ]+}} = call i32 @__tgt_target_kernel(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 256, i32 32, ptr nonnull @.[[K]].region_id, ptr nonnull %kernel_args)
87-
// CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes.[[K]], ptr null, ptr null)
88-
// CHECK: ret void
108+
// CHECK-NEXT: [[TGT_RET:%.*]] = call i32 @__tgt_target_kernel(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 256, i32 32, ptr nonnull @._kernel_1.region_id, ptr nonnull %kernel_args)
109+
// CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 2, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes._kernel_1.end, ptr null, ptr null)
110+
// CHECK-NEXT: ret void
89111
// CHECK-NEXT: }
90112

91113
// CHECK: declare void @__tgt_register_lib(ptr) local_unnamed_addr

0 commit comments

Comments
 (0)