-
Notifications
You must be signed in to change notification settings - Fork 247
Migrate base memory resources to native CCCL resource concept #2289
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2aaf53c
8a5d30f
5e4d5de
b45ab23
857d291
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,20 +4,16 @@ | |
| */ | ||
| #pragma once | ||
|
|
||
| #include <rmm/cuda_device.hpp> | ||
| #include <rmm/aligned.hpp> | ||
| #include <rmm/cuda_stream_view.hpp> | ||
| #include <rmm/detail/error.hpp> | ||
| #include <rmm/detail/export.hpp> | ||
| #include <rmm/detail/runtime_capabilities.hpp> | ||
| #include <rmm/mr/cuda_async_view_memory_resource.hpp> | ||
| #include <rmm/mr/detail/cuda_async_managed_memory_resource_impl.hpp> | ||
| #include <rmm/mr/device_memory_resource.hpp> | ||
|
|
||
| #include <cuda/std/type_traits> | ||
| #include <cuda/memory_resource> | ||
| #include <cuda_runtime_api.h> | ||
|
|
||
| #include <cstddef> | ||
| #include <cstdint> | ||
| #include <optional> | ||
|
|
||
| namespace RMM_NAMESPACE { | ||
| namespace mr { | ||
|
|
@@ -32,98 +28,103 @@ namespace mr { | |
| * `cudaMallocFromPoolAsync`/`cudaFreeFromPoolAsync` with a managed memory pool | ||
| * for allocation/deallocation. | ||
| */ | ||
| class cuda_async_managed_memory_resource final : public device_memory_resource { | ||
| class RMM_EXPORT cuda_async_managed_memory_resource final | ||
| : public device_memory_resource, | ||
| private cuda::mr::shared_resource<detail::cuda_async_managed_memory_resource_impl> { | ||
| using shared_base = cuda::mr::shared_resource<detail::cuda_async_managed_memory_resource_impl>; | ||
|
|
||
| public: | ||
| // Begin legacy device_memory_resource compatibility layer | ||
| using device_memory_resource::allocate; | ||
| using device_memory_resource::allocate_sync; | ||
| using device_memory_resource::deallocate; | ||
| using device_memory_resource::deallocate_sync; | ||
|
|
||
| /** | ||
| * @brief Constructs a cuda_async_managed_memory_resource with the default managed memory pool for | ||
| * the current device. | ||
| * | ||
| * The default managed memory pool is the pool that is created when the device is created. | ||
| * Pool properties such as the release threshold are not modified. | ||
| * @brief Compare two resources for equality (shared-impl identity). | ||
| * | ||
| * @throws rmm::logic_error if the CUDA version does not support `cudaMallocFromPoolAsync` with | ||
| * managed memory pool | ||
| * @param other The other cuda_async_managed_memory_resource to compare against. | ||
| * @return true if both resources share the same underlying pool. | ||
| */ | ||
| cuda_async_managed_memory_resource() | ||
| [[nodiscard]] bool operator==(cuda_async_managed_memory_resource const& other) const noexcept | ||
| { | ||
| // Check if managed memory pools are supported | ||
| RMM_EXPECTS(rmm::detail::runtime_async_managed_alloc::is_supported(), | ||
| "cuda_async_managed_memory_resource requires CUDA 13.0 or higher"); | ||
|
|
||
| #if defined(CUDA_VERSION) && CUDA_VERSION >= RMM_MIN_ASYNC_MANAGED_ALLOC_CUDA_VERSION | ||
| cudaMemPool_t managed_pool_handle{}; | ||
| cudaMemLocation location{.type = cudaMemLocationTypeDevice, | ||
| .id = rmm::get_current_cuda_device().value()}; | ||
| RMM_CUDA_TRY( | ||
| cudaMemGetDefaultMemPool(&managed_pool_handle, &location, cudaMemAllocationTypeManaged)); | ||
| pool_ = cuda_async_view_memory_resource{managed_pool_handle}; | ||
| #endif | ||
| return static_cast<shared_base const&>(*this) == static_cast<shared_base const&>(other); | ||
| } | ||
|
|
||
| /** | ||
| * @brief Returns the underlying native handle to the CUDA pool | ||
| * @brief Compare two resources for inequality. | ||
| * | ||
| * @return cudaMemPool_t Handle to the underlying CUDA pool | ||
| * @param other The other cuda_async_managed_memory_resource to compare against. | ||
| * @return true if the resources do not share the same underlying pool. | ||
| */ | ||
| [[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return pool_.pool_handle(); } | ||
|
|
||
| ~cuda_async_managed_memory_resource() override {} | ||
| cuda_async_managed_memory_resource(cuda_async_managed_memory_resource const&) = delete; | ||
| cuda_async_managed_memory_resource(cuda_async_managed_memory_resource&&) = delete; | ||
| cuda_async_managed_memory_resource& operator=(cuda_async_managed_memory_resource const&) = delete; | ||
| cuda_async_managed_memory_resource& operator=(cuda_async_managed_memory_resource&&) = delete; | ||
|
|
||
| private: | ||
| cuda_async_view_memory_resource pool_{}; | ||
| [[nodiscard]] bool operator!=(cuda_async_managed_memory_resource const& other) const noexcept | ||
| { | ||
| return !(*this == other); | ||
| } | ||
| // End legacy device_memory_resource compatibility layer | ||
|
|
||
| /** | ||
| * @brief Allocates memory of size at least \p bytes. | ||
| * | ||
| * The returned pointer will have at minimum 256 byte alignment. | ||
| * | ||
| * @param bytes The size of the allocation | ||
| * @param stream Stream on which to perform allocation | ||
| * @return void* Pointer to the newly allocated memory | ||
| * @brief Enables the `cuda::mr::device_accessible` property | ||
| */ | ||
| void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override | ||
| RMM_CONSTEXPR_FRIEND void get_property(cuda_async_managed_memory_resource const&, | ||
| cuda::mr::device_accessible) noexcept | ||
| { | ||
| return pool_.allocate(stream, bytes); | ||
| } | ||
|
|
||
|
Comment on lines
66
to
73
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it possible to get these properties from the impl via
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| /** | ||
| * @brief Deallocate memory pointed to by \p ptr. | ||
| * | ||
| * @param ptr Pointer to be deallocated | ||
| * @param bytes The size in bytes of the allocation. This must be equal to the | ||
| * value of `bytes` that was passed to the `allocate` call that returned `ptr`. | ||
| * @param stream Stream on which to perform deallocation | ||
| * @brief Enables the `cuda::mr::host_accessible` property | ||
| */ | ||
| void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) noexcept override | ||
| RMM_CONSTEXPR_FRIEND void get_property(cuda_async_managed_memory_resource const&, | ||
| cuda::mr::host_accessible) noexcept | ||
| { | ||
| pool_.deallocate(stream, ptr, bytes); | ||
| } | ||
|
|
||
| /** | ||
| * @brief Compare this resource to another. | ||
| * @brief Constructs a cuda_async_managed_memory_resource with the default managed memory pool for | ||
| * the current device. | ||
| * | ||
| * @param other The other resource to compare to | ||
| * @return true If the two resources are equivalent | ||
| * @return false If the two resources are not equal | ||
| * The default managed memory pool is the pool that is created when the device is created. | ||
| * Pool properties such as the release threshold are not modified. | ||
| * | ||
| * @throws rmm::logic_error if the CUDA version does not support `cudaMallocFromPoolAsync` with | ||
| * managed memory pool | ||
| */ | ||
| [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override | ||
| { | ||
| auto const* async_mr = dynamic_cast<cuda_async_managed_memory_resource const*>(&other); | ||
| return (async_mr != nullptr) && (this->pool_handle() == async_mr->pool_handle()); | ||
| } | ||
| cuda_async_managed_memory_resource(); | ||
|
|
||
| /** | ||
| * @brief Returns the underlying native handle to the CUDA pool | ||
| * | ||
| * @return cudaMemPool_t Handle to the underlying CUDA pool | ||
| */ | ||
| [[nodiscard]] cudaMemPool_t pool_handle() const noexcept; | ||
|
|
||
| ~cuda_async_managed_memory_resource() = default; | ||
| cuda_async_managed_memory_resource(cuda_async_managed_memory_resource const&) = delete; | ||
| cuda_async_managed_memory_resource(cuda_async_managed_memory_resource&&) = delete; | ||
| cuda_async_managed_memory_resource& operator=(cuda_async_managed_memory_resource const&) = delete; | ||
| cuda_async_managed_memory_resource& operator=(cuda_async_managed_memory_resource&&) = delete; | ||
|
|
||
| // Begin legacy device_memory_resource compatibility layer | ||
| private: | ||
| void* do_allocate(std::size_t bytes, cuda_stream_view stream) override; | ||
|
|
||
| void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) noexcept override; | ||
|
|
||
| [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override; | ||
| // End legacy device_memory_resource compatibility layer | ||
| }; | ||
|
|
||
| // static property checks | ||
| static_assert(cuda::mr::synchronous_resource<cuda_async_managed_memory_resource>); | ||
| static_assert(cuda::mr::resource<cuda_async_managed_memory_resource>); | ||
| static_assert(cuda::mr::synchronous_resource_with<cuda_async_managed_memory_resource, | ||
| cuda::mr::device_accessible>); | ||
| static_assert(cuda::mr::synchronous_resource_with<cuda_async_managed_memory_resource, | ||
| cuda::mr::host_accessible>); | ||
| static_assert( | ||
| cuda::mr::resource_with<cuda_async_managed_memory_resource, cuda::mr::device_accessible>); | ||
| static_assert( | ||
| cuda::mr::resource_with<cuda_async_managed_memory_resource, cuda::mr::host_accessible>); | ||
|
|
||
| /** @} */ // end of group | ||
| } // namespace mr | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is going away soon, but remind me, why do we get the concrete implementations from
device_memory_resourcerather thanshared_base?See previous comments here: #2246 (comment)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is being fixed in a later phase. I have a draft of it locally. I think it's fine to delay this until that later PR is ready.