Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ jobs:
brew install gnu-tar
echo "/usr/local/opt/gnu-tar/libexec/gnubin" >> $GITHUB_PATH

# Those two will also create target/${{ matrix.target }}/maturin
# Those two will also create target/maturin/maturin
- name: Build wheel (linux x86_64)
if: matrix.target == 'x86_64-unknown-linux-musl'
run: |
Expand Down Expand Up @@ -111,12 +111,17 @@ jobs:
- name: Archive binary (windows)
if: matrix.os == 'windows-latest'
run: |
cd target/${{ matrix.target }}/release
7z a ../../../${{ matrix.name }} ${{ github.event.repository.name }}.exe
7z a ${{ matrix.name }} target/maturin/${{ github.event.repository.name }}.exe

- name: Archive binary (linux)
if: matrix.os == 'ubuntu-latest'
run: |
cd target/maturin
tar czvf ../../${{ matrix.name }} ${{ github.event.repository.name }}
cd -

- name: Archive binary (linux and macOS)
if: matrix.os != 'windows-latest'
- name: Archive binary (macOS x86_64)
if: matrix.os == 'macos-latest'
run: |
cd target/${{ matrix.target }}/release
tar czvf ../../../${{ matrix.name }} ${{ github.event.repository.name }}
Expand Down Expand Up @@ -213,7 +218,7 @@ jobs:
--compatibility ${{ matrix.platform.compatibility }} \
--features password-storage
- name: Archive binary
run: tar czvf target/release/maturin-${{ matrix.platform.target }}.tar.gz -C target/${{ matrix.platform.target }}/release maturin
run: tar czvf target/release/maturin-${{ matrix.platform.target }}.tar.gz -C target/maturin maturin
- name: Upload wheel artifacts
# loongarch64 is not allowed to publish to pypi currently, see https://github.com/pypi/warehouse/blob/348117529910181cb8c14e9b5fb00fcf4dbaf07c/warehouse/forklift/legacy.py#L114-L185
if: ${{ !contains(fromJson('["loongarch64-unknown-linux-musl"]'), matrix.platform.target) }}
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ cargo-cyclonedx = { version = "0.5.7", optional = true }
pretty_assertions = { version = "1.3.0", optional = true }
which = { version = "8.0.0", optional = true }

memmap2 = "0.9.9"

[dev-dependencies]
expect-test = "1.4.1"
fs4 = { version = "0.13.1", features = ["fs-err3"] }
Expand Down
35 changes: 20 additions & 15 deletions src/build_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1048,32 +1048,37 @@ impl BuildContext {
.cloned()
.ok_or_else(|| anyhow!(error_msg,))?;

self.stage_artifact(&mut artifact)?;

if let Some(extension_name) = extension_name {
// globin has an issue parsing MIPS64 ELF, see https://github.com/m4b/goblin/issues/274
// goblin has an issue parsing MIPS64 ELF, see https://github.com/m4b/goblin/issues/274
// But don't fail the build just because we can't emit a warning
let _ = warn_missing_py_init(&artifact.path, extension_name);
}

self.copy_artifact_for_repair(&mut artifact)?;
Ok((artifact, result.out_dirs))
}

/// Copy an artifact to a staging directory so that auditwheel repair can
/// modify it in-place without altering the original cargo build output.
/// This prevents errors on subsequent rebuilds where cargo skips
/// recompilation.
/// Move an artifact to a private staging directory so that:
/// 1. `warn_missing_py_init` can safely mmap the file without risk of
/// concurrent modification by cargo / rust-analyzer.
/// 2. Auditwheel repair can modify it in-place without altering the
/// original cargo build output.
///
/// Only performs the copy when auditwheel mode is `Repair`, since that is
/// the only mode that modifies artifacts in-place.
fn copy_artifact_for_repair(&self, artifact: &mut BuildArtifact) -> Result<()> {
if self.editable || !matches!(self.auditwheel, AuditWheelMode::Repair) {
return Ok(());
}
/// Uses `fs::rename` for an atomic move (avoids copying multi-GB debug
/// artifacts), falling back to `fs::copy` when the rename fails (e.g.
/// cross-device).
fn stage_artifact(&self, artifact: &mut BuildArtifact) -> Result<()> {
let maturin_build = self.target_dir.join(env!("CARGO_PKG_NAME"));
fs::create_dir_all(&maturin_build)?;
let artifact_path = &artifact.path;
let new_artifact_path = maturin_build.join(artifact_path.file_name().unwrap());
fs::copy(artifact_path, &new_artifact_path)?;
// Remove any stale file at the destination so that `fs::rename` succeeds
// on Windows (where rename fails if the destination already exists).
let _ = fs::remove_file(&new_artifact_path);
if fs::rename(artifact_path, &new_artifact_path).is_err() {
// Rename fails across filesystem boundaries, fall back to copy
fs::copy(artifact_path, &new_artifact_path)?;
}
artifact.path = new_artifact_path.normalize()?.into_path_buf();
Ok(())
}
Expand Down Expand Up @@ -1322,7 +1327,7 @@ impl BuildContext {
policies.push(policy);
ext_libs.push(external_libs);

self.copy_artifact_for_repair(&mut artifact)?;
self.stage_artifact(&mut artifact)?;
artifact_paths.push(artifact);
}
let policy = policies.iter().min_by_key(|p| p.priority).unwrap();
Expand Down
12 changes: 7 additions & 5 deletions src/compile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use fs_err::{self as fs, File};
use normpath::PathExt;
use std::collections::HashMap;
use std::env;
use std::io::{BufReader, Read};
use std::io::BufReader;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::str;
Expand Down Expand Up @@ -753,11 +753,13 @@ fn compile_target(
#[instrument(skip_all)]
pub fn warn_missing_py_init(artifact: &Path, module_name: &str) -> Result<()> {
let py_init = format!("PyInit_{module_name}");
let mut fd = File::open(artifact)?;
let mut buffer = Vec::new();
fd.read_to_end(&mut buffer)?;
let fd = File::open(artifact)?;
// SAFETY: The caller stages (moves/copies) the artifact into a private
// directory before invoking this function, so no concurrent process
// (e.g. cargo / rust-analyzer) can modify it while we have it mapped.
let mmap = unsafe { memmap2::Mmap::map(&fd).context("mmap failed")? };
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that sound, does it guarantee non of the rust invariants are being violated? If so, can you write a safety comment on why it it safe to do?

Copy link
Contributor Author

@orlp orlp Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@konstin Please see https://docs.rs/memmap2/0.9.9/memmap2/struct.Mmap.html#safety.

I'm not exactly sure what kind of safety comment you'd be looking for. It is technically inherently 'unsound' by the Rust abstract model if someone edits the file in the file system at the same time. But mmap is very widely used regardless, including in the Rust project itself, e.g. https://github.com/rust-lang/rust/blob/94a0cd15f5976fa35e5e6784e621c04e9f958e57/src/tools/rust-analyzer/crates/proc-macro-srv/src/dylib.rs#L110.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm afraid I'm not comfortable with using mmap like that in maturin. For context, the only production usage of unsafe we have in maturin currently is env::set_var and that also only because it used to be safe in a previous edition.

Applications must consider the risk and take appropriate precautions when using file-backed maps. Solutions such as file permissions, locks or process-private (e.g. unlinked) files exist but are platform specific and limited.

As long as we can't guarantee that the artifact is not actually modified by some means, we risk UB, and that's a risk that's not worth it for this performance win.

Copy link
Contributor Author

@orlp orlp Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@konstin Which risks are you worried about? 'UB' is not a risk in of itself, it's a binary property of a program that states the abstract model of Rust was violated, a risk needs an actual attacker model, consequence and probability of exploitation.

Mmap is already widely used in the Rust compiler: https://github.com/search?q=repo%3Arust-lang%2Frust%20mmap&type=code. I'd really like to know which risks in particular you're worried about that are unacceptable for maturin but are apparently acceptable for the Rust compiler.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know which measure or soundness considerations the rust-lang/rust repo has around mmap, but the documentation you linked at https://docs.rs/memmap2/0.9.9/memmap2/struct.Mmap.html#safety is very clear that unless we guarantee that the underlying file doesn't change, this is unsound. I assume those mmap calls ensure this, but here I don't see that we've ensured that (yet).

With rust being safe by default, using unsafe inverts the language contract: We need to proof and document at the site where we use the unsafe that we're keeping up the contract, and should also document why the unsafe is necessary in the first place, usually with a // SAFETY: comment.

Copy link
Contributor Author

@orlp orlp Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@konstin "Measure of soundness" is the wrong measure. The API is fundamentally unsound. You can not prevent someone from mutating the file. There also is no alternative without re-writing the entire parser to be Read based rather than &[u8] based.

I'm perfectly aware of how Rust works with unsafe. You are however holding me to an impossible standard. There is no safety comment I could possibly write containing the contract we uphold, because we can not possibly guarantee that the contract is upheld. This is no different in the Rust compiler.

However, would you be satisfied with the following logic?

// SAFETY: technically this is unsound if the file gets modified while we hold the
// immutable &[u8] slice. However, considering we're reading a dynamic library
// with executable code intended to be run, should a malicious agent have write
// access to it, arbitrary code execution is already trivially achieved. So there
// is no reasonable attacker model where this would lead to exploits that are
// otherwise impossible.
let mmap = unsafe { memmap2::Mmap::map(&fd).context("mmap failed")? };

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant more in the sense of, can we ensure that this is not a file that changes underneath the mmap if you do another cargo build while maturin is running? As I understand it, there's a lot of ecosystem usage, where generally those can either assume that the file doesn't change underneath them (they hold a lock, it's a temp dir, they are the only process in that docker container, etc.) or the file changing isn't catastrophic. I know this is unlikely with cargo being slow and how this check is timed, but the uv issue tracker has scared me that every timing and non-determinism bug that can happen, will happen.

Copy link
Contributor Author

@orlp orlp Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we ensure that this is not a file that changes underneath the mmap if you do another cargo build while maturin is running?

No, you can not in general. And should this be a problem (it's not, really), you're already in the woods because you call

let dep_analyzer = DependencyAnalyzer::new(sysroot).library_paths(ld_paths);

which does... you guessed it, mmap:

https://github.com/messense/lddtree-rs/blob/e3ab19edf39b4d2d9cc90058e53a7ec966c271ac/src/lib.rs#L133

I must re-iterate that I feel you're holding me to an impossibly high standard for this PR, considering your tool already does the exact same strategy I proposed elsewhere (as well as your dependencies in the Rust compiler, probably others).

let mut found = false;
match goblin::Object::parse(&buffer)? {
match goblin::Object::parse(&mmap)? {
goblin::Object::Elf(elf) => {
for dyn_sym in elf.dynsyms.iter() {
if py_init == elf.dynstrtab[dyn_sym.st_name] {
Expand Down
18 changes: 5 additions & 13 deletions tests/common/pep517.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,18 +94,10 @@ fn insert_path(env_var: &str, new_path: &Path) -> String {
.expect("PATH is not valid utf8")
}

/// Whether cargo built the shared library for the specified cargo profile in the test target
/// directory.
/// Whether cargo built for the specified cargo profile in the test target directory.
pub fn target_has_profile(unique_name: &str, profile: &str) -> bool {
let shared_library = if cfg!(windows) {
"pyo3_pure.dll"
} else if cfg!(target_os = "macos") {
"libpyo3_pure.dylib"
} else {
"libpyo3_pure.so"
};
PathBuf::from(target_dir(unique_name))
.join(profile)
.join(shared_library)
.is_file()
let profile_dir = PathBuf::from(target_dir(unique_name)).join(profile);
// Check for cargo's .fingerprint directory which is always created for the
// profile that was used, and is not affected by maturin's artifact staging.
profile_dir.join(".fingerprint").is_dir()
}
Loading