I'm working on rust-lang/rust#99069, and trying
#![feature(core_intrinsics)]
#![allow(internal_features)]
#[inline]
#[target_feature(enable = "cmpxchg16b")]
fn load_internal(x: *const u128) -> u128 {
use std::intrinsics::{AtomicOrdering, atomic_load};
unsafe { atomic_load::<u128, { AtomicOrdering::Relaxed }>(x) }
}
#[unsafe(no_mangle)]
fn load_core(x: *const u128) -> u128 {
unsafe { load_internal(x) }
}
fn main() {
println!("load = {:#p}", load_core as unsafe fn(_) -> _ as *const ());
}
LLVM IR that rustc generates (cargo rustc --release -- --emit=llvm-ir -Cno-prepopulate-passes):
define internal noundef i128 @_RNvCsezRfsEAcE7i_2rr13load_internal(ptr noundef %x) unnamed_addr #3 {
start:
%0 = alloca [16 x i8], align 16
call void @llvm.lifetime.start.p0(ptr %0)
%1 = load atomic i128, ptr %x monotonic, align 16
store i128 %1, ptr %0, align 16
%_0 = load i128, ptr %0, align 16
call void @llvm.lifetime.end.p0(ptr %0)
ret i128 %_0
}
define dso_local noundef i128 @load_core(ptr noundef %x) unnamed_addr #0 {
start:
; call rr::load_internal
%_0 = call noundef i128 @_RNvCsezRfsEAcE7i_2rr13load_internal(ptr noundef %x)
ret i128 %_0
}
attributes #0 = { nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #3 = { nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" "target-features"="+cx16" }
It's optimized to (cargo rustc --release -- --emit=llvm-ir):
define dso_local noundef i128 @load_core(ptr noundef readonly captures(none) %x) unnamed_addr #3 {
start:
%0 = load atomic i128, ptr %x monotonic, align 16
ret i128 %0
}
attributes #3 = { mustprogress nofree norecurse nounwind nonlazybind willreturn memory(argmem: readwrite) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
and finally it causes a linking error: __atomic_load_16 is undefined, because LLVM forgots cx16 is available after inlining and select the compiler intrinsic instead of cmpxchg16b.
The function load_internal is already marked with the cx16 feature, so cmpxchg16b is available. load atomic inside the function body should be compiled into cmpxchg16b rather than a call to __atomic_load_16, regardless of its caller.
Godbolt link of LLVM IR that rustc generates: https://godbolt.org/z/zf6qYWTdK (without -O3 it works)
I'm working on rust-lang/rust#99069, and trying
LLVM IR that rustc generates (
cargo rustc --release -- --emit=llvm-ir -Cno-prepopulate-passes):It's optimized to (
cargo rustc --release -- --emit=llvm-ir):and finally it causes a linking error:
__atomic_load_16is undefined, because LLVM forgotscx16is available after inlining and select the compiler intrinsic instead ofcmpxchg16b.The function
load_internalis already marked with thecx16feature, socmpxchg16bis available.load atomicinside the function body should be compiled intocmpxchg16brather than a call to__atomic_load_16, regardless of its caller.Godbolt link of LLVM IR that rustc generates: https://godbolt.org/z/zf6qYWTdK (without
-O3it works)