From dd99465ad68639793ead23fbd400a4f2e8820e96 Mon Sep 17 00:00:00 2001 From: Shi Zhang Date: Thu, 9 Apr 2026 14:37:46 -0700 Subject: [PATCH 1/2] change file save buffer size and use external buffer to avoid neighbor buffer allocation for each vertex when saving --- .../async_/simple_neighbor_provider.rs | 47 ++++++++++++++++ diskann-providers/src/storage/bin.rs | 56 ++++++++++++++----- 2 files changed, 88 insertions(+), 15 deletions(-) diff --git a/diskann-providers/src/model/graph/provider/async_/simple_neighbor_provider.rs b/diskann-providers/src/model/graph/provider/async_/simple_neighbor_provider.rs index 2584caa4f..5b70df823 100644 --- a/diskann-providers/src/model/graph/provider/async_/simple_neighbor_provider.rs +++ b/diskann-providers/src/model/graph/provider/async_/simple_neighbor_provider.rs @@ -288,6 +288,27 @@ impl storage::bin::GetAdjacencyList for SimpleNeighborProviderAsync { Ok(list) } + /// Optimized version that reuses a pre-allocated buffer to avoid per-call allocation. + /// + /// This directly copies the adjacency list into the provided buffer, avoiding the + /// intermediate `AdjacencyList` allocation that `get_adjacency_list` requires. + fn get_adjacency_list_into(&self, i: usize, buffer: &mut Vec) -> ANNResult<()> { + #[cfg(test)] + self.num_get_calls.increment(); + + // Lint: We don't have a good way of recovering from lock poisoning anyways. + #[allow(clippy::unwrap_used)] + let _guard = self.locks[i].read().unwrap(); + + // SAFETY: We are holding the read lock for `i`. + let list = unsafe { self.get_slice(i) }; + + // Reuse buffer: clear and copy data directly + buffer.clear(); + buffer.extend_from_slice(list); + Ok(()) + } + fn total(&self) -> usize { self.locks.len() } @@ -346,6 +367,32 @@ impl storage::bin::GetAdjacencyList for DiskAdaptor<'_> { Ok(list) } + /// Optimized version that reuses a pre-allocated buffer to avoid per-call allocation. + /// + /// This directly reads neighbors into the buffer and performs the start point remapping + /// in-place, avoiding the intermediate `AdjacencyList` allocation. + fn get_adjacency_list_into(&self, i: usize, buffer: &mut Vec) -> ANNResult<()> { + // Lint: We don't have a good way of recovering from lock poisoning anyways. + #[allow(clippy::unwrap_used)] + let _guard = self.provider.locks[i].read().unwrap(); + + // SAFETY: We are holding the read lock for `i`. + let list = unsafe { self.provider.get_slice(i) }; + + // Reuse buffer: clear and copy data directly + buffer.clear(); + buffer.extend_from_slice(list); + + // Remap the in-memory start point to the actual start point + for id in buffer.iter_mut() { + if *id == self.inmem_start_point { + *id = self.actual_start_point; + } + } + + Ok(()) + } + fn total(&self) -> usize { // Don't include any start points at the end. self.provider.locks.len() - self.provider.num_start_points diff --git a/diskann-providers/src/storage/bin.rs b/diskann-providers/src/storage/bin.rs index bf1f89d14..ffc3a8e80 100644 --- a/diskann-providers/src/storage/bin.rs +++ b/diskann-providers/src/storage/bin.rs @@ -15,6 +15,12 @@ use diskann_utils::io::Metadata; use crate::{model::graph::traits::AdHoc, utils::load_metadata_from_file}; +/// Default buffer size for save operations (16 MB). +/// +/// This larger buffer reduces the number of system calls during serialization, +/// improving write throughput especially for large graphs and data files. +const SAVE_BUFFER_SIZE: usize = 16 * 1024 * 1024; + /// An simplified adaptor interface for allowing providers to use and [`load_graph`]. /// /// These traits are meant for IO purposes and are not meant as general access traits for @@ -100,6 +106,23 @@ pub(crate) trait GetAdjacencyList { /// Retrieve the data stored at index `i`. fn get_adjacency_list(&self, i: usize) -> ANNResult>; + /// Retrieve the data stored at index `i` into a pre-allocated buffer. + /// + /// This method allows callers to reuse a buffer across multiple calls, + /// avoiding per-call memory allocation overhead. The buffer is cleared + /// before being populated with the adjacency list data. + /// + /// Default implementation falls back to `get_adjacency_list` and copies. + fn get_adjacency_list_into(&self, i: usize, buffer: &mut Vec) -> ANNResult<()> + where + Self::Element: Clone, + { + buffer.clear(); + let list = self.get_adjacency_list(i)?; + buffer.extend_from_slice(&list); + Ok(()) + } + /// Return the total number of elements contained in `self`. fn total(&self) -> usize; @@ -183,7 +206,9 @@ where let total = data.total(); let dim = data.dim(); - let mut writer = provider.create_for_write(path)?; + let file = provider.create_for_write(path)?; + // Use 16 MB buffer for efficient I/O + let mut writer = BufWriter::with_capacity(SAVE_BUFFER_SIZE, file); let mut points_written: u32 = 0; Metadata::new(points_written, dim)?.write(&mut writer)?; @@ -338,37 +363,38 @@ where { let file = provider.create_for_write(path)?; - let mut out = BufWriter::new(file); + // Use 16 MB buffer to reduce system call overhead + let mut out = BufWriter::with_capacity(SAVE_BUFFER_SIZE, file); let mut index_size: u64 = 24; let mut observed_max_degree: u32 = 0; out.write_all(&index_size.to_le_bytes())?; - out.write_all(&observed_max_degree.to_le_bytes())?; // Will be updated later with correct max_degree + out.write_all(&observed_max_degree.to_le_bytes())?; out.write_all(&start_point.to_le_bytes())?; - out.write_all(&graph.additional_points().to_le_bytes())?; + let total = graph.total(); + // Pre-allocate a reusable buffer for adjacency lists + let initial_capacity = graph.max_degree().map(|d| d as usize).unwrap_or(128); + let mut neighbor_buffer: Vec = Vec::with_capacity(initial_capacity); + for i in 0..total { - let binding = graph.get_adjacency_list(i)?; - let neighbors: &[u32] = &binding; - let num_neighbors: u32 = neighbors.len() as u32; + // Reuse buffer to avoid per-vertex allocation overhead + graph.get_adjacency_list_into(i, &mut neighbor_buffer)?; + let num_neighbors: u32 = neighbor_buffer.len() as u32; - // Write the number of neighbors as a `u32`. out.write_all(&num_neighbors.to_le_bytes())?; - // Write all the neighbors, applying transformation if provided. - neighbors - .iter() - .copied() - .try_for_each(|n| out.write_all(&n.to_le_bytes()))?; + // Bulk write using bytemuck for zero-copy conversion + let neighbor_bytes: &[u8] = bytemuck::must_cast_slice(&neighbor_buffer); + out.write_all(neighbor_bytes)?; observed_max_degree = observed_max_degree.max(num_neighbors); - index_size += (std::mem::size_of::() * (1 + neighbors.len())) as u64; + index_size += (std::mem::size_of::() * (1 + neighbor_buffer.len())) as u64; } - // Use configured max degree if provided, otherwise use observed let max_degree = graph.max_degree().unwrap_or(observed_max_degree); // Finish up by writing the observed index size and max degree. From 9130420f83fd547b4d081ec4a15fefb51a36b3d2 Mon Sep 17 00:00:00 2001 From: Shi Zhang Date: Fri, 17 Apr 2026 16:30:30 -0700 Subject: [PATCH 2/2] fix comments --- .../provider/async_/simple_neighbor_provider.rs | 1 - diskann-providers/src/storage/bin.rs | 13 ++----------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/diskann-providers/src/model/graph/provider/async_/simple_neighbor_provider.rs b/diskann-providers/src/model/graph/provider/async_/simple_neighbor_provider.rs index 5b70df823..7c59d89e6 100644 --- a/diskann-providers/src/model/graph/provider/async_/simple_neighbor_provider.rs +++ b/diskann-providers/src/model/graph/provider/async_/simple_neighbor_provider.rs @@ -297,7 +297,6 @@ impl storage::bin::GetAdjacencyList for SimpleNeighborProviderAsync { self.num_get_calls.increment(); // Lint: We don't have a good way of recovering from lock poisoning anyways. - #[allow(clippy::unwrap_used)] let _guard = self.locks[i].read().unwrap(); // SAFETY: We are holding the read lock for `i`. diff --git a/diskann-providers/src/storage/bin.rs b/diskann-providers/src/storage/bin.rs index ffc3a8e80..5ec320d84 100644 --- a/diskann-providers/src/storage/bin.rs +++ b/diskann-providers/src/storage/bin.rs @@ -15,12 +15,6 @@ use diskann_utils::io::Metadata; use crate::{model::graph::traits::AdHoc, utils::load_metadata_from_file}; -/// Default buffer size for save operations (16 MB). -/// -/// This larger buffer reduces the number of system calls during serialization, -/// improving write throughput especially for large graphs and data files. -const SAVE_BUFFER_SIZE: usize = 16 * 1024 * 1024; - /// An simplified adaptor interface for allowing providers to use and [`load_graph`]. /// /// These traits are meant for IO purposes and are not meant as general access traits for @@ -206,9 +200,7 @@ where let total = data.total(); let dim = data.dim(); - let file = provider.create_for_write(path)?; - // Use 16 MB buffer for efficient I/O - let mut writer = BufWriter::with_capacity(SAVE_BUFFER_SIZE, file); + let mut writer = provider.create_for_write(path)?; let mut points_written: u32 = 0; Metadata::new(points_written, dim)?.write(&mut writer)?; @@ -363,8 +355,7 @@ where { let file = provider.create_for_write(path)?; - // Use 16 MB buffer to reduce system call overhead - let mut out = BufWriter::with_capacity(SAVE_BUFFER_SIZE, file); + let mut out = BufWriter::new(file); let mut index_size: u64 = 24; let mut observed_max_degree: u32 = 0;