primitive_row_operators.cuh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
26 #include <cudf/detail/utilities/assert.cuh>
27 #include <cudf/hashing/detail/hash_functions.cuh>
28 #include <cudf/hashing/detail/hashing.hpp>
34 
35 #include <cuda/std/limits>
36 #include <cuda/std/type_traits>
37 #include <thrust/equal.h>
38 
39 #include <memory>
40 
41 namespace CUDF_EXPORT cudf {
42 
56 
57 namespace row::primitive {
58 
65 template <typename T>
66 using primitive_type_t = cuda::std::conditional_t<cudf::is_numeric<T>(), T, void>;
67 
74 template <cudf::type_id Id>
77 };
78 
86  public:
96  template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
97  __device__ bool operator()(column_device_view const& lhs,
98  column_device_view const& rhs,
99  size_type lhs_element_index,
100  size_type rhs_element_index) const
101  {
102  return cudf::equality_compare(lhs.element<Element>(lhs_element_index),
103  rhs.element<Element>(rhs_element_index));
104  }
105 
106  // @cond
107  template <typename Element, CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>())>
108  __device__ bool operator()(column_device_view const&,
109  column_device_view const&,
110  size_type,
111  size_type) const
112  {
113  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
114  }
115  // @endcond
116 };
117 
125  public:
136  std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> lhs,
137  std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> rhs,
138  null_equality nulls_are_equal)
139  : _has_nulls{has_nulls}, _lhs{*lhs}, _rhs{*rhs}, _nulls_are_equal{nulls_are_equal}
140  {
141  CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns.");
142  }
143 
151  __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const
152  {
153  auto equal_elements = [this, lhs_row_index, rhs_row_index](column_device_view const& l,
154  column_device_view const& r) {
155  // Handle null comparison for each element
156  if (_has_nulls) {
157  bool const lhs_is_null{l.is_null(lhs_row_index)};
158  bool const rhs_is_null{r.is_null(rhs_row_index)};
159  if (lhs_is_null and rhs_is_null) {
160  return _nulls_are_equal == null_equality::EQUAL;
161  } else if (lhs_is_null != rhs_is_null) {
162  return false;
163  }
164  }
165 
166  // Both elements are non-null, compare their values
167  element_equality_comparator comparator;
168  return cudf::type_dispatcher<dispatch_primitive_type>(
169  l.type(), comparator, l, r, lhs_row_index, rhs_row_index);
170  };
171 
172  return thrust::equal(thrust::seq, _lhs.begin(), _lhs.end(), _rhs.begin(), equal_elements);
173  }
174 
184  {
185  return (*this)(static_cast<size_type>(lhs_index), static_cast<size_type>(rhs_index));
186  }
187 
188  private:
189  cudf::nullate::DYNAMIC _has_nulls;
190  table_device_view _lhs;
191  table_device_view _rhs;
192  null_equality _nulls_are_equal;
193 };
194 
203 template <template <typename> class Hash>
205  public:
215  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
216  __device__ hash_value_type operator()(hash_value_type seed,
217  column_device_view const& col,
218  size_type row_index) const
219  {
220  return Hash<T>{seed}(col.element<T>(row_index));
221  }
222 
223  // @cond
224  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
225  __device__ hash_value_type operator()(hash_value_type, column_device_view const&, size_type) const
226  {
227  CUDF_UNREACHABLE("Unsupported type in hash.");
228  }
229  // @endcond
230 };
231 
240 template <template <typename> class Hash = cudf::hashing::detail::default_hash>
241 class row_hasher {
242  public:
243  row_hasher() = delete;
244 
254  hash_value_type seed = DEFAULT_HASH_SEED)
255  : _has_nulls{has_nulls}, _table{t}, _seed{seed}
256  {
257  }
258 
267  std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> t,
268  hash_value_type seed = DEFAULT_HASH_SEED)
269  : _has_nulls{has_nulls}, _table{*t}, _seed{seed}
270  {
271  }
272 
279  __device__ auto operator()(size_type row_index) const
280  {
281  element_hasher<Hash> hasher;
282  // avoid hash combine call if there is only one column
283  auto hash = cuda::std::numeric_limits<hash_value_type>::max();
284  if (!_has_nulls || !_table.column(0).is_null(row_index)) {
285  hash = cudf::type_dispatcher<dispatch_primitive_type>(
286  _table.column(0).type(), hasher, _seed, _table.column(0), row_index);
287  }
288 
289  for (size_type i = 1; i < _table.num_columns(); ++i) {
290  if (!(_has_nulls && _table.column(i).is_null(row_index))) {
291  hash = cudf::hashing::detail::hash_combine(
292  hash,
293  cudf::type_dispatcher<dispatch_primitive_type>(
294  _table.column(i).type(), hasher, _seed, _table.column(i), row_index));
295  } else {
296  hash = cudf::hashing::detail::hash_combine(
297  hash, cuda::std::numeric_limits<hash_value_type>::max());
298  }
299  }
300  return hash;
301  }
302 
303  private:
304  cudf::nullate::DYNAMIC _has_nulls;
305  table_device_view _table;
306  hash_value_type _seed;
307 };
308 
309 } // namespace row::primitive
310 } // namespace CUDF_EXPORT cudf
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
T element(size_type element_index) const noexcept
Returns a copy of the element at the specified index.
Performs an equality comparison between two elements in two columns.
Function object for computing the hash value of a row in a column.
Performs a relational comparison between two elements in two tables.
row_equality_comparator(cudf::nullate::DYNAMIC const &has_nulls, std::shared_ptr< cudf::experimental::row::equality::preprocessed_table > lhs, std::shared_ptr< cudf::experimental::row::equality::preprocessed_table > rhs, null_equality nulls_are_equal)
Construct a new row equality comparator object.
bool operator()(cudf::experimental::row::lhs_index_type lhs_index, cudf::experimental::row::rhs_index_type rhs_index) const
Compares the specified rows for equality.
bool operator()(size_type lhs_row_index, size_type rhs_row_index) const
Compares the specified rows for equality.
Computes the hash value of a row in the given table.
auto operator()(size_type row_index) const
Computes the hash value of the row at row_index in the table
row_hasher(cudf::nullate::DYNAMIC const &has_nulls, table_device_view t, hash_value_type seed=DEFAULT_HASH_SEED)
Constructs a row_hasher object with a seed value.
row_hasher(cudf::nullate::DYNAMIC const &has_nulls, std::shared_ptr< cudf::experimental::row::equality::preprocessed_table > t, hash_value_type seed=DEFAULT_HASH_SEED)
Constructs a row_hasher object with a seed value.
Table device view that is usable in device memory.
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:200
A set of cudf::column's of the same size.
Definition: table.hpp:40
Column device view class definitions.
lhs_index_type
Strongly typed index for left-hand side table rows.
rhs_index_type
Strongly typed index for right-hand side table rows.
uint32_t hash_value_type
Type of hash value.
Definition: hashing.hpp:29
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:154
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:151
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
cuDF interfaces
Definition: host_udf.hpp:37
bool is_primitive_row_op_compatible(cudf::table_view const &table)
Checks if a table is compatible with primitive row operations.
bool has_nulls(table_view const &view)
Returns True if the table has nulls in any of its columns.
bool equality_compare(Element lhs, Element rhs)
Alias for backward compatibility with legacy row operators.
cuda::std::conditional_t< cudf::is_numeric< T >(), T, void > primitive_type_t
Returns void if it's not a primitive type.
nullate::DYNAMIC defers the determination of nullability to run time rather than compile time....
Custom dispatcher for primitive types.
primitive_type_t< id_to_type< Id > > type
The underlying type.
Table device view class definitions.
Class definitions for (mutable)_table_view
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.