Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,15 @@ add_library(
src/mr/detail/fixed_size_memory_resource_impl.cpp
src/mr/detail/logging_resource_adaptor_impl.cpp
src/mr/detail/logging_resource_adaptor_impl.cpp
src/mr/detail/limiting_resource_adaptor_impl.cpp
src/mr/detail/pool_memory_resource_impl.cpp
src/mr/detail/pool_memory_resource_impl.cpp
src/mr/detail/prefetch_resource_adaptor_impl.cpp
src/mr/detail/statistics_resource_adaptor_impl.cpp
src/mr/detail/tracking_resource_adaptor_impl.cpp
src/mr/detail/thread_safe_resource_adaptor_impl.cpp
src/mr/fixed_size_memory_resource.cpp
src/mr/limiting_resource_adaptor.cpp
src/mr/logging_resource_adaptor.cpp
src/mr/pool_memory_resource.cpp
src/mr/prefetch_resource_adaptor.cpp
Expand Down
85 changes: 85 additions & 0 deletions cpp/include/rmm/mr/detail/limiting_resource_adaptor_impl.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once

#include <rmm/detail/export.hpp>
#include <rmm/resource_ref.hpp>

#include <cuda/memory_resource>

#include <atomic>
#include <cstddef>

namespace RMM_NAMESPACE {
namespace mr {
namespace detail {

/**
* @brief Implementation class for limiting_resource_adaptor.
*
* Limits the total bytes allocatable through the upstream resource. This class
* satisfies the CCCL `cuda::mr::resource` concept and is held by
* `limiting_resource_adaptor` via `cuda::mr::shared_resource` for
* reference-counted ownership.
*/
class limiting_resource_adaptor_impl {
public:
limiting_resource_adaptor_impl(device_async_resource_ref upstream,
std::size_t allocation_limit,
std::size_t alignment);

~limiting_resource_adaptor_impl() = default;

limiting_resource_adaptor_impl(limiting_resource_adaptor_impl const&) = delete;
limiting_resource_adaptor_impl(limiting_resource_adaptor_impl&&) = delete;
limiting_resource_adaptor_impl& operator=(limiting_resource_adaptor_impl const&) = delete;
limiting_resource_adaptor_impl& operator=(limiting_resource_adaptor_impl&&) = delete;

bool operator==(limiting_resource_adaptor_impl const& other) const noexcept
{
return this == std::addressof(other);
}

bool operator!=(limiting_resource_adaptor_impl const& other) const noexcept
{
return !(*this == other);
}

[[nodiscard]] device_async_resource_ref get_upstream_resource() const noexcept;

[[nodiscard]] std::size_t get_allocated_bytes() const;

[[nodiscard]] std::size_t get_allocation_limit() const;

void* allocate(cuda::stream_ref stream,
std::size_t bytes,
std::size_t alignment = alignof(std::max_align_t));

void deallocate(cuda::stream_ref stream,
void* ptr,
std::size_t bytes,
std::size_t alignment = alignof(std::max_align_t)) noexcept;

void* allocate_sync(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t));

void deallocate_sync(void* ptr,
std::size_t bytes,
std::size_t alignment = alignof(std::max_align_t)) noexcept;

RMM_CONSTEXPR_FRIEND void get_property(limiting_resource_adaptor_impl const&,
cuda::mr::device_accessible) noexcept
{
}

private:
cuda::mr::any_resource<cuda::mr::device_accessible> upstream_mr_;
std::size_t allocation_limit_;
std::atomic<std::size_t> allocated_bytes_;
std::size_t alignment_;
};

} // namespace detail
} // namespace mr
} // namespace RMM_NAMESPACE
173 changes: 64 additions & 109 deletions cpp/include/rmm/mr/limiting_resource_adaptor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,15 @@
#pragma once

#include <rmm/aligned.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/detail/export.hpp>
#include <rmm/detail/format.hpp>
#include <rmm/mr/detail/limiting_resource_adaptor_impl.hpp>
#include <rmm/mr/device_memory_resource.hpp>
#include <rmm/mr/per_device_resource.hpp>
#include <rmm/resource_ref.hpp>

#include <atomic>
#include <cuda/memory_resource>

#include <cstddef>
#include <memory>

namespace RMM_NAMESPACE {
namespace mr {
Expand All @@ -24,20 +23,60 @@ namespace mr {
* @file
*/
/**
* @brief Resource that uses `Upstream` to allocate memory and limits the total
* @brief Resource that uses an upstream resource to allocate memory and limits the total
* allocations possible.
*
* An instance of this resource can be constructed with an existing, upstream
* resource in order to satisfy allocation requests, but any existing allocations
* will be untracked. Atomics are used to make this thread-safe, but note that
* the `get_allocated_bytes` may not include in-flight allocations.
*
* @tparam Upstream Type of the upstream resource used for
* allocation/deallocation.
* This class is copyable and shares ownership of its internal state via
* `cuda::mr::shared_resource`.
*/
template <typename Upstream>
class limiting_resource_adaptor final : public device_memory_resource {
class RMM_EXPORT limiting_resource_adaptor
: public device_memory_resource,
private cuda::mr::shared_resource<detail::limiting_resource_adaptor_impl> {
using shared_base = cuda::mr::shared_resource<detail::limiting_resource_adaptor_impl>;

public:
// Begin legacy device_memory_resource compatibility layer
using device_memory_resource::allocate;
using device_memory_resource::allocate_sync;
using device_memory_resource::deallocate;
using device_memory_resource::deallocate_sync;

/**
* @brief Compare two adaptors for equality (shared-impl identity).
*
* @param other The other limiting_resource_adaptor to compare against.
* @return true if both adaptors share the same underlying state.
*/
[[nodiscard]] bool operator==(limiting_resource_adaptor const& other) const noexcept
{
return static_cast<shared_base const&>(*this) == static_cast<shared_base const&>(other);
}

/**
* @brief Compare two adaptors for inequality.
*
* @param other The other limiting_resource_adaptor to compare against.
* @return true if the adaptors do not share the same underlying state.
*/
[[nodiscard]] bool operator!=(limiting_resource_adaptor const& other) const noexcept
{
return !(*this == other);
}
// End legacy device_memory_resource compatibility layer

/**
* @brief Enables the `cuda::mr::device_accessible` property
*/
RMM_CONSTEXPR_FRIEND void get_property(limiting_resource_adaptor const&,
cuda::mr::device_accessible) noexcept
{
}

/**
* @brief Construct a new limiting resource adaptor using `upstream` to satisfy
* allocation requests and limiting the total allocation amount possible.
Expand All @@ -48,13 +87,7 @@ class limiting_resource_adaptor final : public device_memory_resource {
*/
limiting_resource_adaptor(device_async_resource_ref upstream,
std::size_t allocation_limit,
std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT)
: upstream_{upstream},
allocation_limit_{allocation_limit},
allocated_bytes_(0),
alignment_(alignment)
{
}
std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT);

/**
* @brief Construct a new limiting resource adaptor using `upstream` to satisfy
Expand All @@ -66,32 +99,16 @@ class limiting_resource_adaptor final : public device_memory_resource {
* @param allocation_limit Maximum memory allowed for this allocator
* @param alignment Alignment in bytes for the start of each allocated buffer
*/
limiting_resource_adaptor(Upstream* upstream,
limiting_resource_adaptor(device_memory_resource* upstream,
std::size_t allocation_limit,
std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT)
: upstream_{to_device_async_resource_ref_checked(upstream)},
allocation_limit_{allocation_limit},
allocated_bytes_(0),
alignment_(alignment)
{
}
std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT);

limiting_resource_adaptor() = delete;
~limiting_resource_adaptor() override = default;
limiting_resource_adaptor(limiting_resource_adaptor const&) = delete;
limiting_resource_adaptor(limiting_resource_adaptor&&) noexcept =
default; ///< @default_move_constructor
limiting_resource_adaptor& operator=(limiting_resource_adaptor const&) = delete;
limiting_resource_adaptor& operator=(limiting_resource_adaptor&&) noexcept =
default; ///< @default_move_assignment{limiting_resource_adaptor}
~limiting_resource_adaptor() = default;

/**
* @briefreturn{device_async_resource_ref to the upstream resource}
*/
[[nodiscard]] device_async_resource_ref get_upstream_resource() const noexcept
{
return upstream_;
}
[[nodiscard]] device_async_resource_ref get_upstream_resource() const noexcept;

/**
* @brief Query the number of bytes that have been allocated. Note that
Expand All @@ -102,7 +119,7 @@ class limiting_resource_adaptor final : public device_memory_resource {
* @return std::size_t number of bytes that have been allocated through this
* allocator.
*/
[[nodiscard]] std::size_t get_allocated_bytes() const { return allocated_bytes_; }
[[nodiscard]] std::size_t get_allocated_bytes() const;

/**
* @brief Query the maximum number of bytes that this allocator is allowed
Expand All @@ -111,83 +128,21 @@ class limiting_resource_adaptor final : public device_memory_resource {
*
* @return std::size_t max number of bytes allowed for this allocator
*/
[[nodiscard]] std::size_t get_allocation_limit() const { return allocation_limit_; }
[[nodiscard]] std::size_t get_allocation_limit() const;

// Begin legacy device_memory_resource compatibility layer
private:
/**
* @brief Allocates memory of size at least `bytes` using the upstream
* resource as long as it fits inside the allocation limit.
*
* The returned pointer has at least 256B alignment.
*
* @throws rmm::bad_alloc if the requested allocation could not be fulfilled
* by the upstream resource.
*
* @param bytes The size, in bytes, of the allocation
* @param stream Stream on which to perform the allocation
* @return void* Pointer to the newly allocated memory
*/
void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
{
auto const proposed_size = align_up(bytes, alignment_);
auto const old = allocated_bytes_.fetch_add(proposed_size);
if (old + proposed_size <= allocation_limit_) {
try {
return get_upstream_resource().allocate(stream, bytes);
} catch (...) {
allocated_bytes_ -= proposed_size;
throw;
}
}

allocated_bytes_ -= proposed_size;
auto const msg = std::string("Exceeded memory limit (failed to allocate ") +
rmm::detail::format_bytes(bytes) + ")";
RMM_FAIL(msg.c_str(), rmm::out_of_memory);
}

/**
* @brief Free allocation of size `bytes` pointed to by `ptr`
*
* @param ptr Pointer to be deallocated
* @param bytes Size of the allocation
* @param stream Stream on which to perform the deallocation
*/
void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override
{
std::size_t allocated_size = align_up(bytes, alignment_);
get_upstream_resource().deallocate(stream, ptr, bytes);
allocated_bytes_ -= allocated_size;
}

/**
* @brief Compare the upstream resource to another.
*
* @param other The other resource to compare to
* @return true If the two resources are equivalent
* @return false If the two resources are not equal
*/
[[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
{
if (this == std::addressof(other)) { return true; }
auto const* cast = dynamic_cast<limiting_resource_adaptor<Upstream> const*>(&other);
if (cast == nullptr) { return false; }
return get_upstream_resource() == cast->get_upstream_resource();
}
void* do_allocate(std::size_t bytes, cuda_stream_view stream) override;

// The upstream resource used for satisfying allocation requests
device_async_resource_ref upstream_;
void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override;

// maximum bytes this allocator is allowed to allocate.
std::size_t allocation_limit_;

// number of currently-allocated bytes
std::atomic<std::size_t> allocated_bytes_;

// todo: should be some way to ask the upstream...
std::size_t alignment_;
[[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override;
// End legacy device_memory_resource compatibility layer
};

static_assert(cuda::mr::resource_with<limiting_resource_adaptor, cuda::mr::device_accessible>,
"limiting_resource_adaptor does not satisfy the cuda::mr::resource concept");

/** @} */ // end of group
} // namespace mr
} // namespace RMM_NAMESPACE
Loading
Loading