Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,29 +93,33 @@ add_library(
src/mr/aligned_resource_adaptor.cpp
src/mr/arena_memory_resource.cpp
src/mr/binning_memory_resource.cpp
src/mr/callback_memory_resource.cpp
src/mr/cuda_async_managed_memory_resource.cpp
src/mr/cuda_async_memory_resource.cpp
src/mr/detail/aligned_resource_adaptor_impl.cpp
src/mr/detail/arena_memory_resource_impl.cpp
src/mr/detail/binning_memory_resource_impl.cpp
src/mr/detail/callback_memory_resource_impl.cpp
src/mr/callback_memory_resource.cpp
src/mr/detail/cuda_async_managed_memory_resource_impl.cpp
src/mr/detail/cuda_async_memory_resource_impl.cpp
src/mr/detail/fixed_size_memory_resource_impl.cpp
src/mr/detail/logging_resource_adaptor_impl.cpp
src/mr/detail/logging_resource_adaptor_impl.cpp
src/mr/detail/limiting_resource_adaptor_impl.cpp
src/mr/detail/pool_memory_resource_impl.cpp
src/mr/detail/logging_resource_adaptor_impl.cpp
src/mr/detail/pool_memory_resource_impl.cpp
src/mr/detail/prefetch_resource_adaptor_impl.cpp
src/mr/detail/sam_headroom_memory_resource_impl.cpp
src/mr/detail/statistics_resource_adaptor_impl.cpp
src/mr/detail/tracking_resource_adaptor_impl.cpp
src/mr/detail/thread_safe_resource_adaptor_impl.cpp
src/mr/detail/tracking_resource_adaptor_impl.cpp
src/mr/fixed_size_memory_resource.cpp
src/mr/limiting_resource_adaptor.cpp
src/mr/logging_resource_adaptor.cpp
src/mr/pool_memory_resource.cpp
src/mr/prefetch_resource_adaptor.cpp
src/mr/sam_headroom_memory_resource.cpp
src/mr/statistics_resource_adaptor.cpp
src/mr/tracking_resource_adaptor.cpp
src/mr/thread_safe_resource_adaptor.cpp
src/mr/tracking_resource_adaptor.cpp
src/prefetch.cpp)
add_library(rmm::rmm ALIAS rmm)

Expand Down
133 changes: 67 additions & 66 deletions cpp/include/rmm/mr/cuda_async_managed_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,16 @@
*/
#pragma once

#include <rmm/cuda_device.hpp>
#include <rmm/aligned.hpp>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/detail/export.hpp>
#include <rmm/detail/runtime_capabilities.hpp>
#include <rmm/mr/cuda_async_view_memory_resource.hpp>
#include <rmm/mr/detail/cuda_async_managed_memory_resource_impl.hpp>
#include <rmm/mr/device_memory_resource.hpp>

#include <cuda/std/type_traits>
#include <cuda/memory_resource>
#include <cuda_runtime_api.h>

#include <cstddef>
#include <cstdint>
#include <optional>

namespace RMM_NAMESPACE {
namespace mr {
Expand All @@ -32,98 +28,103 @@ namespace mr {
* `cudaMallocFromPoolAsync`/`cudaFreeFromPoolAsync` with a managed memory pool
* for allocation/deallocation.
*/
class cuda_async_managed_memory_resource final : public device_memory_resource {
class RMM_EXPORT cuda_async_managed_memory_resource final
: public device_memory_resource,
private cuda::mr::shared_resource<detail::cuda_async_managed_memory_resource_impl> {
using shared_base = cuda::mr::shared_resource<detail::cuda_async_managed_memory_resource_impl>;

public:
// Begin legacy device_memory_resource compatibility layer
using device_memory_resource::allocate;
using device_memory_resource::allocate_sync;
using device_memory_resource::deallocate;
using device_memory_resource::deallocate_sync;
Comment on lines +37 to +41
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is going away soon, but remind me, why do we get the concrete implementations from device_memory_resource rather than shared_base?

See previous comments here: #2246 (comment)

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is being fixed in a later phase. I have a draft of it locally. I think it's fine to delay this until that later PR is ready.


/**
* @brief Constructs a cuda_async_managed_memory_resource with the default managed memory pool for
* the current device.
*
* The default managed memory pool is the pool that is created when the device is created.
* Pool properties such as the release threshold are not modified.
* @brief Compare two resources for equality (shared-impl identity).
*
* @throws rmm::logic_error if the CUDA version does not support `cudaMallocFromPoolAsync` with
* managed memory pool
* @param other The other cuda_async_managed_memory_resource to compare against.
* @return true if both resources share the same underlying pool.
*/
cuda_async_managed_memory_resource()
[[nodiscard]] bool operator==(cuda_async_managed_memory_resource const& other) const noexcept
{
// Check if managed memory pools are supported
RMM_EXPECTS(rmm::detail::runtime_async_managed_alloc::is_supported(),
"cuda_async_managed_memory_resource requires CUDA 13.0 or higher");

#if defined(CUDA_VERSION) && CUDA_VERSION >= RMM_MIN_ASYNC_MANAGED_ALLOC_CUDA_VERSION
cudaMemPool_t managed_pool_handle{};
cudaMemLocation location{.type = cudaMemLocationTypeDevice,
.id = rmm::get_current_cuda_device().value()};
RMM_CUDA_TRY(
cudaMemGetDefaultMemPool(&managed_pool_handle, &location, cudaMemAllocationTypeManaged));
pool_ = cuda_async_view_memory_resource{managed_pool_handle};
#endif
return static_cast<shared_base const&>(*this) == static_cast<shared_base const&>(other);
}

/**
* @brief Returns the underlying native handle to the CUDA pool
* @brief Compare two resources for inequality.
*
* @return cudaMemPool_t Handle to the underlying CUDA pool
* @param other The other cuda_async_managed_memory_resource to compare against.
* @return true if the resources do not share the same underlying pool.
*/
[[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return pool_.pool_handle(); }

~cuda_async_managed_memory_resource() override {}
cuda_async_managed_memory_resource(cuda_async_managed_memory_resource const&) = delete;
cuda_async_managed_memory_resource(cuda_async_managed_memory_resource&&) = delete;
cuda_async_managed_memory_resource& operator=(cuda_async_managed_memory_resource const&) = delete;
cuda_async_managed_memory_resource& operator=(cuda_async_managed_memory_resource&&) = delete;

private:
cuda_async_view_memory_resource pool_{};
[[nodiscard]] bool operator!=(cuda_async_managed_memory_resource const& other) const noexcept
{
return !(*this == other);
}
// End legacy device_memory_resource compatibility layer

/**
* @brief Allocates memory of size at least \p bytes.
*
* The returned pointer will have at minimum 256 byte alignment.
*
* @param bytes The size of the allocation
* @param stream Stream on which to perform allocation
* @return void* Pointer to the newly allocated memory
* @brief Enables the `cuda::mr::device_accessible` property
*/
void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override
RMM_CONSTEXPR_FRIEND void get_property(cuda_async_managed_memory_resource const&,
cuda::mr::device_accessible) noexcept
{
return pool_.allocate(stream, bytes);
}

Comment on lines 66 to 73
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to get these properties from the impl via using shared_base::get_property?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_property is a friend free function (ADL), not a member, so using cannot pull it in. All shared_resource classes duplicate it.

/**
* @brief Deallocate memory pointed to by \p ptr.
*
* @param ptr Pointer to be deallocated
* @param bytes The size in bytes of the allocation. This must be equal to the
* value of `bytes` that was passed to the `allocate` call that returned `ptr`.
* @param stream Stream on which to perform deallocation
* @brief Enables the `cuda::mr::host_accessible` property
*/
void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) noexcept override
RMM_CONSTEXPR_FRIEND void get_property(cuda_async_managed_memory_resource const&,
cuda::mr::host_accessible) noexcept
{
pool_.deallocate(stream, ptr, bytes);
}

/**
* @brief Compare this resource to another.
* @brief Constructs a cuda_async_managed_memory_resource with the default managed memory pool for
* the current device.
*
* @param other The other resource to compare to
* @return true If the two resources are equivalent
* @return false If the two resources are not equal
* The default managed memory pool is the pool that is created when the device is created.
* Pool properties such as the release threshold are not modified.
*
* @throws rmm::logic_error if the CUDA version does not support `cudaMallocFromPoolAsync` with
* managed memory pool
*/
[[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
{
auto const* async_mr = dynamic_cast<cuda_async_managed_memory_resource const*>(&other);
return (async_mr != nullptr) && (this->pool_handle() == async_mr->pool_handle());
}
cuda_async_managed_memory_resource();

/**
* @brief Returns the underlying native handle to the CUDA pool
*
* @return cudaMemPool_t Handle to the underlying CUDA pool
*/
[[nodiscard]] cudaMemPool_t pool_handle() const noexcept;

~cuda_async_managed_memory_resource() = default;
cuda_async_managed_memory_resource(cuda_async_managed_memory_resource const&) = delete;
cuda_async_managed_memory_resource(cuda_async_managed_memory_resource&&) = delete;
cuda_async_managed_memory_resource& operator=(cuda_async_managed_memory_resource const&) = delete;
cuda_async_managed_memory_resource& operator=(cuda_async_managed_memory_resource&&) = delete;

// Begin legacy device_memory_resource compatibility layer
private:
void* do_allocate(std::size_t bytes, cuda_stream_view stream) override;

void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override;

[[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override;
// End legacy device_memory_resource compatibility layer
};

// static property checks
static_assert(cuda::mr::synchronous_resource<cuda_async_managed_memory_resource>);
static_assert(cuda::mr::resource<cuda_async_managed_memory_resource>);
static_assert(cuda::mr::synchronous_resource_with<cuda_async_managed_memory_resource,
cuda::mr::device_accessible>);
static_assert(cuda::mr::synchronous_resource_with<cuda_async_managed_memory_resource,
cuda::mr::host_accessible>);
static_assert(
cuda::mr::resource_with<cuda_async_managed_memory_resource, cuda::mr::device_accessible>);
static_assert(
cuda::mr::resource_with<cuda_async_managed_memory_resource, cuda::mr::host_accessible>);

/** @} */ // end of group
} // namespace mr
Expand Down
Loading
Loading