rapidsai · rapids-bot · Jan 6, 2026 · Dec 10, 2025 · Dec 11, 2025 · Dec 12, 2025
@@ -1,27 +1,23 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
 #include "join/conditional_join.hpp"
 #include "join/conditional_join_kernels.cuh"
-#include "join/join_common_utils.cuh"
 #include "join/join_common_utils.hpp"
 
 #include <cudf/ast/detail/expression_parser.hpp>
 #include <cudf/ast/expressions.hpp>
 #include <cudf/detail/device_scalar.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/detail/utilities/grid_1d.cuh>
 #include <cudf/join/conditional_join.hpp>
 #include <cudf/join/join.hpp>
-#include <cudf/table/table.hpp>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/error.hpp>
-#include <cudf/utilities/memory_resource.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 

@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -19,6 +19,8 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
+#include <memory>
+
 namespace cudf {
 namespace detail {
 /**

@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 #include "join_common_utils.cuh"
@@ -20,6 +20,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
 #include <rmm/mr/polymorphic_allocator.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <cooperative_groups.h>
 #include <cub/block/block_scan.cuh>

@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -16,6 +16,7 @@
 #include <cudf/join/filtered_join.hpp>
 #include <cudf/join/join.hpp>
 #include <cudf/table/table_view.hpp>
+#include <cudf/types.hpp>
 #include <cudf/utilities/error.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -32,6 +33,8 @@
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/sequence.h>
 
+#include <memory>
+
 namespace cudf {
 namespace detail {
 namespace {

@@ -1,10 +1,12 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 #include "join_common_utils.cuh"
+#include "join_common_utils.hpp"
 
 #include <cudf/copying.hpp>
+#include <cudf/detail/cuco_helpers.hpp>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/join/hash_join.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
@@ -42,8 +44,35 @@ namespace detail {
 namespace {
 using hash_table_t = cudf::hash_join::impl_type::hash_table_t;
 
-// Multimap type used for mixed joins. TODO: This is a temporary alias used
-// TODO: `pair_equal` to be moved to common utils during mixed-join migration
+/**
+ * @brief Checks if a join operation is trivial (empty tables or certain join types with empty
+ * data).
+ */
+bool is_trivial_join(table_view const& left, table_view const& right, join_kind join_type)
+{
+  // If there is nothing to join, then send empty table with all columns
+  if (left.is_empty() || right.is_empty()) { return true; }
+
+  // If left join and the left table is empty, return immediately
+  if ((join_kind::LEFT_JOIN == join_type) && (0 == left.num_rows())) { return true; }
+
+  // If Inner Join and either table is empty, return immediately
+  if ((join_kind::INNER_JOIN == join_type) && ((0 == left.num_rows()) || (0 == right.num_rows()))) {
+    return true;
+  }
+
+  // If left semi join (contains) and right table is empty,
+  // return immediately
+  if ((join_kind::LEFT_SEMI_JOIN == join_type) && (0 == right.num_rows())) { return true; }
+
+  // If left semi- or anti- join, and the left table is empty, return immediately
+  if ((join_kind::LEFT_SEMI_JOIN == join_type || join_kind::LEFT_ANTI_JOIN == join_type) &&
+      (0 == left.num_rows())) {
+    return true;
+  }
+
+  return false;
+}
 
 template <typename Equal>
 class pair_equal {

@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 #include "join_common_utils.hpp"
@@ -12,9 +12,14 @@
 #include <cudf/join/join.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
+#include <cudf/types.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/resource_ref.hpp>
+
+#include <memory>
 
 namespace cudf {
 namespace detail {

@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 #pragma once
@@ -11,16 +11,12 @@
 #include <cudf/detail/row_operator/equality.cuh>
 #include <cudf/detail/row_operator/hashing.cuh>
 #include <cudf/detail/utilities/cuda.cuh>
-#include <cudf/hashing/detail/murmurhash3_x86_32.cuh>
-#include <cudf/utilities/memory_resource.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_uvector.hpp>
 
 #include <thrust/iterator/counting_iterator.h>
 
 #include <memory>
-#include <utility>
 
 namespace cudf::detail {
 template <typename Hasher>
@@ -52,169 +48,6 @@ class row_is_valid {
   bitmask_type const* _row_bitmask;
 };
 
-/**
- * @brief Device functor to determine if two pairs are identical.
- *
- * This equality comparator is designed for use with cuco::static_multimap's
- * pair* APIs, which will compare equality based on comparing (key, value)
- * pairs. In the context of joins, these pairs are of the form
- * (row_hash, row_id). A hash probe hit indicates that hash of a probe row's hash is
- * equal to the hash of the hash of some row in the multimap, at which point we need an
- * equality comparator that will check whether the contents of the rows are
- * identical. This comparator does so by verifying key equality (i.e. that
- * probe_row_hash == build_row_hash) and then using a row_equality_comparator
- * to compare the contents of the row indices that are stored as the payload in
- * the hash map.
- *
- * @tparam Comparator The row comparator type to perform row equality comparison from row indices.
- */
-template <typename DeviceComparator>
-class pair_equality {
- public:
-  pair_equality(DeviceComparator check_row_equality)
-    : _check_row_equality{std::move(check_row_equality)}
-  {
-  }
-
-  // The parameters are build/probe rather than left/right because the operator
-  // is called by cuco's kernels with parameters in this order (note that this
-  // is an implementation detail that we should eventually stop relying on by
-  // defining operators with suitable heterogeneous typing). Rather than
-  // converting to left/right semantics, we can operate directly on build/probe
-  template <typename LhsPair, typename RhsPair>
-  __device__ __forceinline__ bool operator()(LhsPair const& lhs, RhsPair const& rhs) const noexcept
-  {
-    using detail::row::lhs_index_type;
-    using detail::row::rhs_index_type;
-
-    return lhs.first == rhs.first and
-           _check_row_equality(lhs_index_type{rhs.second}, rhs_index_type{lhs.second});
-  }
-
- private:
-  DeviceComparator _check_row_equality;
-};
-
-/**
- * @brief Computes the trivial left join operation for the case when the
- * right table is empty.
- *
- * In this case all the valid indices of the left table
- * are returned with their corresponding right indices being set to
- * `JoinNoMatch`, i.e. `cuda::std::numeric_limits<size_type>::min()`.
- *
- * @param left Table of left columns to join
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate the result
- *
- * @return Join output indices vector pair
- */
-std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
-          std::unique_ptr<rmm::device_uvector<size_type>>>
-get_trivial_left_join_indices(table_view const& left,
-                              rmm::cuda_stream_view stream,
-                              rmm::device_async_resource_ref mr);
-
-/**
- * @brief Builds the hash table based on the given `build_table`.
- *
- * @tparam MultimapType The type of the hash table
- *
- * @param build Table of columns used to build join hash.
- * @param preprocessed_build shared_ptr to cudf::detail::row::equality::preprocessed_table
- * for build
- * @param hash_table Build hash table.
- * @param has_nested_nulls Flag to denote if build or probe tables have nested nulls
- * @param nulls_equal Flag to denote nulls are equal or not.
- * @param bitmask Bitmask to denote whether a row is valid.
- * @param stream CUDA stream used for device memory operations and kernel launches.
- */
-template <typename HashTable>
-void build_join_hash_table(
-  cudf::table_view const& build,
-  std::shared_ptr<detail::row::equality::preprocessed_table> const& preprocessed_build,
-  HashTable& hash_table,
-  bool has_nested_nulls,
-  null_equality nulls_equal,
-  [[maybe_unused]] bitmask_type const* bitmask,
-  rmm::cuda_stream_view stream)
-{
-  CUDF_EXPECTS(0 != build.num_columns(), "Selected build dataset is empty", std::invalid_argument);
-  CUDF_EXPECTS(0 != build.num_rows(), "Build side table has no rows", std::invalid_argument);
-
-  auto insert_rows = [&](auto const& build, auto const& d_hasher) {
-    auto const iter = cudf::detail::make_counting_transform_iterator(0, pair_fn{d_hasher});
-
-    if (nulls_equal == cudf::null_equality::EQUAL or not nullable(build)) {
-      hash_table.insert_async(iter, iter + build.num_rows(), stream.value());
-    } else {
-      auto const stencil = thrust::counting_iterator<size_type>{0};
-      auto const pred    = row_is_valid{bitmask};
-
-      // insert valid rows
-      hash_table.insert_if_async(iter, iter + build.num_rows(), stencil, pred, stream.value());
-    }
-  };
-
-  auto const nulls = nullate::DYNAMIC{has_nested_nulls};
-
-  auto const row_hash = detail::row::hash::row_hasher{preprocessed_build};
-  auto const d_hasher = row_hash.device_hasher(nulls);
-
-  insert_rows(build, d_hasher);
-}
-
-// Convenient alias for a pair of unique pointers to device uvectors.
-using VectorPair = std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
-                             std::unique_ptr<rmm::device_uvector<size_type>>>;
-
-/**
- * @brief Takes two pairs of vectors and returns a single pair where the first
- * element is a vector made from concatenating the first elements of both input
- * pairs and the second element is a vector made from concatenating the second
- * elements of both input pairs.
- *
- * This function's primary use is for computing the indices of a full join by
- * first performing a left join, then separately getting the complementary
- * right join indices, then finally calling this function to concatenate the
- * results. In this case, each input VectorPair contains the left and right
- * indices from a join.
- *
- * Note that this is a destructive operation, in that at least one of a or b
- * will be invalidated (by a move) by this operation. Calling code should
- * assume that neither input VectorPair is valid after this function executes.
- *
- * @param a The first pair of vectors.
- * @param b The second pair of vectors.
- * @param stream CUDA stream used for device memory operations and kernel launches
- *
- * @return A pair of vectors containing the concatenated output.
- */
-VectorPair concatenate_vector_pairs(VectorPair& a, VectorPair& b, rmm::cuda_stream_view stream);
-
-/**
- * @brief  Creates a table containing the complement of left join indices.
- *
- * This table has two columns. The first one is filled with `JoinNoMatch`
- * and the second one contains values from 0 to right_table_row_count - 1
- * excluding those found in the right_indices column.
- *
- * @param right_indices Vector of indices
- * @param left_table_row_count Number of rows of left table
- * @param right_table_row_count Number of rows of right table
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned vectors.
- *
- * @return Pair of vectors containing the left join indices complement
- */
-std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
-          std::unique_ptr<rmm::device_uvector<size_type>>>
-get_left_join_indices_complement(std::unique_ptr<rmm::device_uvector<size_type>>& right_indices,
-                                 size_type left_table_row_count,
-                                 size_type right_table_row_count,
-                                 rmm::cuda_stream_view stream,
-                                 rmm::device_async_resource_ref mr);
-
 /**
  * @brief Device functor to determine if an index is contained in a range.
  */