Skip to content

LLVM fails to select cmpxchg16b for 128-bit atomics on x86_64 #187503

@usamoi

Description

@usamoi

I'm working on rust-lang/rust#99069, and trying

#![feature(core_intrinsics)]
#![allow(internal_features)]

#[inline]
#[target_feature(enable = "cmpxchg16b")]
fn load_internal(x: *const u128) -> u128 {
    use std::intrinsics::{AtomicOrdering, atomic_load};
    unsafe { atomic_load::<u128, { AtomicOrdering::Relaxed }>(x) }
}

#[unsafe(no_mangle)]
fn load_core(x: *const u128) -> u128 {
    unsafe { load_internal(x) }
}

fn main() {
    println!("load = {:#p}", load_core as unsafe fn(_) -> _ as *const ());
}

LLVM IR that rustc generates (cargo rustc --release -- --emit=llvm-ir -Cno-prepopulate-passes):

define internal noundef i128 @_RNvCsezRfsEAcE7i_2rr13load_internal(ptr noundef %x) unnamed_addr #3 {
start:
  %0 = alloca [16 x i8], align 16
  call void @llvm.lifetime.start.p0(ptr %0)
  %1 = load atomic i128, ptr %x monotonic, align 16
  store i128 %1, ptr %0, align 16
  %_0 = load i128, ptr %0, align 16
  call void @llvm.lifetime.end.p0(ptr %0)
  ret i128 %_0
}

define dso_local noundef i128 @load_core(ptr noundef %x) unnamed_addr #0 {
start:
; call rr::load_internal
  %_0 = call noundef i128 @_RNvCsezRfsEAcE7i_2rr13load_internal(ptr noundef %x)
  ret i128 %_0
}

attributes #0 = { nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #3 = { nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" "target-features"="+cx16" }

It's optimized to (cargo rustc --release -- --emit=llvm-ir):

define dso_local noundef i128 @load_core(ptr noundef readonly captures(none) %x) unnamed_addr #3 {
start:
  %0 = load atomic i128, ptr %x monotonic, align 16
  ret i128 %0
}

attributes #3 = { mustprogress nofree norecurse nounwind nonlazybind willreturn memory(argmem: readwrite) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }

and finally it causes a linking error: __atomic_load_16 is undefined, because LLVM forgots cx16 is available after inlining and select the compiler intrinsic instead of cmpxchg16b.

The function load_internal is already marked with the cx16 feature, so cmpxchg16b is available. load atomic inside the function body should be compiled into cmpxchg16b rather than a call to __atomic_load_16, regardless of its caller.

Godbolt link of LLVM IR that rustc generates: https://godbolt.org/z/zf6qYWTdK (without -O3 it works)

Metadata

Metadata

Assignees

Type

No type
No fields configured for issues without a type.

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions