Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/include/cudf/detail/utilities/algorithm.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include <cudf/detail/utilities/cuda_memcpy.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/memory_resource.hpp>

#include <rmm/cuda_stream_view.hpp>
Expand Down
11 changes: 4 additions & 7 deletions cpp/include/cudf/detail/utilities/host_vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,6 @@ class rmm_host_allocator<void> {
};
};

template <class DesiredProperty, class... Properties>
inline constexpr bool contains_property =
(cuda::std::is_same_v<DesiredProperty, Properties> || ... || false);

/*! \p rmm_host_allocator is a CUDA-specific host memory allocator
* that employs \c `cudf::host_async_resource_ref` for allocation.
*
Expand Down Expand Up @@ -102,11 +98,12 @@ class rmm_host_allocator {
#ifdef __CUDACC__
#pragma nv_exec_check_disable
#endif
template <class... Properties>
rmm_host_allocator(async_host_resource_ref<Properties...> _mr, rmm::cuda_stream_view _stream)
template <typename ResourceType>
rmm_host_allocator(ResourceType _mr, rmm::cuda_stream_view _stream)
: mr(_mr),
stream(_stream),
_is_device_accessible{contains_property<cuda::mr::device_accessible, Properties...>}
_is_device_accessible{
cuda::mr::synchronous_resource_with<ResourceType, cuda::mr::device_accessible>}
{
}

Expand Down
7 changes: 4 additions & 3 deletions cpp/src/io/text/multibyte_split.cu
Original file line number Diff line number Diff line change
Expand Up @@ -456,9 +456,10 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
*thrust::find_if(rmm::exec_policy_nosync(scan_stream),
it,
it + new_offsets_unclamped,
[row_offsets, byte_range_end] __device__(output_offset i) {
return row_offsets[i] >= byte_range_end;
});
cuda::proclaim_return_type<bool>(
[row_offsets, byte_range_end] __device__(output_offset i) {
return row_offsets[i] >= byte_range_end;
}));
// if we had no out-of-bounds offset, we copy all offsets
if (end_loc == new_offsets_unclamped) { return end_loc; }
// otherwise we copy only up to (including) the first out-of-bounds delimiter
Expand Down
10 changes: 7 additions & 3 deletions cpp/src/join/sort_merge_join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

#include <cuda/functional>
#include <cuda/std/iterator>
#include <cuda/std/tuple>
#include <thrust/binary_search.h>
Expand Down Expand Up @@ -124,7 +125,8 @@ merge<LargerIterator, SmallerIterator>::matches_per_row(rmm::cuda_stream_view st
auto match_counts_it = match_counts.begin();
auto smaller_it = thrust::transform_iterator(
sorted_smaller_order_begin,
[] __device__(size_type idx) { return static_cast<detail::row::lhs_index_type>(idx); });
cuda::proclaim_return_type<detail::row::lhs_index_type>(
[] __device__(size_type idx) { return static_cast<detail::row::lhs_index_type>(idx); }));
Comment on lines +128 to +129
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought that proclaim_return_type was no longer needed if the lambda's return type is obvious (like this one)?

Copy link
Copy Markdown
Contributor Author

@bdice bdice Dec 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, this is needed to compile in some cases where deduction is not possible for the compiler. These were needed to fix compiler errors.

Copy link
Copy Markdown
Contributor

@ttnghia ttnghia Dec 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm surprised that such issue still happens with this "trivial" code, since it looks very obvious to deduce the output type:

return static_cast<detail::row::lhs_index_type>(....);

So I consider this as a regression in CCCL 3.2.

thrust::upper_bound(rmm::exec_policy_nosync(stream),
smaller_it,
smaller_it + smaller_numrows,
Expand Down Expand Up @@ -212,10 +214,12 @@ merge<LargerIterator, SmallerIterator>::operator()(rmm::cuda_stream_view stream,
});
auto smaller_it = thrust::transform_iterator(
sorted_smaller_order_begin,
[] __device__(size_type idx) { return static_cast<detail::row::lhs_index_type>(idx); });
cuda::proclaim_return_type<detail::row::lhs_index_type>(
[] __device__(size_type idx) { return static_cast<detail::row::lhs_index_type>(idx); }));
auto larger_it = thrust::transform_iterator(
nonzero_matches.begin(),
[] __device__(size_type idx) { return static_cast<detail::row::rhs_index_type>(idx); });
cuda::proclaim_return_type<detail::row::rhs_index_type>(
[] __device__(size_type idx) { return static_cast<detail::row::rhs_index_type>(idx); }));
thrust::lower_bound(rmm::exec_policy_nosync(stream),
smaller_it,
smaller_it + smaller_numrows,
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/quantiles/tdigest/tdigest_aggregation.cu
Original file line number Diff line number Diff line change
Expand Up @@ -738,8 +738,9 @@ cluster_info generate_group_cluster_info(int delta,
// CPU. This specifically addresses customer use cases with large inputs and small numbers of
// groups, such as just 1. if we're going to be using the CPU, use pinned for a few of the temp
// buffers
auto temp_mr =
use_cpu ? cudf::get_pinned_memory_resource() : cudf::get_current_device_resource_ref();
rmm::device_async_resource_ref temp_mr =
use_cpu ? rmm::device_async_resource_ref{cudf::get_pinned_memory_resource()}
: cudf::get_current_device_resource_ref();

// output from the function
cluster_info cinfo;
Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/pandas/scripts/conftest-patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -10454,6 +10454,7 @@ def set_copy_on_write_option():
"tests/indexing/test_iloc.py::TestILocErrors::test_iloc_float_raises[object-Series]",
"tests/indexing/test_iloc.py::TestILocErrors::test_iloc_float_raises[uint64-Series]",
"tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_setitem_mix_of_nan_and_interval[NaTType-1.0]",
"tests/indexing/test_iloc.py::TestiLocBaseIndependent::test_setitem_mix_of_nan_and_interval[NaTType-a]",
"tests/io/excel/test_openpyxl.py::test_engine_kwargs_append_data_only[True-0]",
"tests/io/excel/test_writers.py::TestExcelWriterEngineTests::test_ExcelWriter_dispatch[OpenpyxlWriter-.xlsx]",
"tests/io/formats/test_format.py::TestDataFrameFormatting::test_auto_detect",
Expand Down