experimental/row_operators.cuh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2022-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
26 #include <cudf/detail/iterator.cuh>
27 #include <cudf/detail/utilities/algorithm.cuh>
28 #include <cudf/detail/utilities/assert.cuh>
29 #include <cudf/hashing/detail/default_hash.cuh>
30 #include <cudf/hashing/detail/hashing.hpp>
31 #include <cudf/lists/detail/dremel.hpp>
32 #include <cudf/lists/list_device_view.cuh>
33 #include <cudf/lists/lists_column_device_view.cuh>
34 #include <cudf/sorting.hpp>
35 #include <cudf/structs/structs_column_device_view.cuh>
41 
42 #include <cuda/std/functional>
43 #include <cuda/std/limits>
44 #include <cuda/std/optional>
45 #include <cuda/std/tuple>
46 #include <cuda/std/utility>
47 #include <thrust/detail/use_default.h>
48 #include <thrust/equal.h>
49 #include <thrust/execution_policy.h>
50 #include <thrust/functional.h>
51 #include <thrust/iterator/counting_iterator.h>
52 #include <thrust/iterator/iterator_adaptor.h>
53 #include <thrust/iterator/iterator_categories.h>
54 #include <thrust/iterator/iterator_facade.h>
55 #include <thrust/iterator/transform_iterator.h>
56 #include <thrust/logical.h>
57 #include <thrust/swap.h>
58 #include <thrust/transform_reduce.h>
59 
60 #include <memory>
61 #include <type_traits>
62 
63 namespace CUDF_EXPORT cudf {
64 
65 namespace row::primitive {
66 class row_equality_comparator; // Forward declaration
67 
68 template <template <typename> class Hash>
69 class row_hasher; // Forward declaration
70 } // namespace row::primitive
71 
72 namespace experimental {
73 
92 template <cudf::type_id t>
95  using type = std::conditional_t<t == type_id::STRUCT or t == type_id::LIST, void, id_to_type<t>>;
96 };
97 
98 namespace row {
99 
101 enum class lhs_index_type : size_type {};
103 enum class rhs_index_type : size_type {};
104 
121 template <typename Index, typename Underlying = std::underlying_type_t<Index>>
122 struct strong_index_iterator : public thrust::iterator_facade<strong_index_iterator<Index>,
123  Index,
124  thrust::use_default,
125  thrust::random_access_traversal_tag,
126  Index,
127  Underlying> {
128  using super_t =
129  thrust::iterator_adaptor<strong_index_iterator<Index>, Index>;
130 
136  explicit constexpr strong_index_iterator(Underlying n) : begin{n} {}
137 
138  friend class thrust::iterator_core_access;
139 
140  private:
141  __device__ constexpr void increment() { ++begin; }
142  __device__ constexpr void decrement() { --begin; }
143 
144  __device__ constexpr void advance(Underlying n) { begin += n; }
145 
146  __device__ constexpr bool equal(strong_index_iterator<Index> const& other) const noexcept
147  {
148  return begin == other.begin;
149  }
150 
151  __device__ constexpr Index dereference() const noexcept { return static_cast<Index>(begin); }
152 
153  __device__ constexpr Underlying distance_to(
154  strong_index_iterator<Index> const& other) const noexcept
155  {
156  return other.begin - begin;
157  }
158 
159  Underlying begin{};
160 };
161 
166 
171 
172 namespace lexicographic {
173 
192  template <typename Element>
193  __device__ constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
194  {
195  return detail::compare_elements(lhs, rhs);
196  }
197 };
198 
215  template <typename Element, CUDF_ENABLE_IF(not std::is_floating_point_v<Element>)>
216  __device__ constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
217  {
218  return detail::compare_elements(lhs, rhs);
219  }
220 
228  template <typename Element, CUDF_ENABLE_IF(std::is_floating_point_v<Element>)>
229  __device__ constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
230  {
231  if (isnan(lhs)) {
232  return isnan(rhs) ? weak_ordering::EQUIVALENT : weak_ordering::GREATER;
233  } else if (isnan(rhs)) {
234  return weak_ordering::LESS;
235  }
236 
237  return detail::compare_elements(lhs, rhs);
238  }
239 };
240 
242 using optional_dremel_view = cuda::std::optional<detail::dremel_device_view const>;
243 
244 // The has_nested_columns template parameter of the device_row_comparator is
245 // necessary to help the compiler optimize our code. Without it, the list and
246 // struct view specializations are present in the code paths used for primitive
247 // types, and the compiler fails to inline this nearly as well resulting in a
248 // significant performance drop. As a result, there is some minor tension in
249 // the current design between the presence of this parameter and the way that
250 // the Dremel data is passed around, first as a
251 // std::optional<device_span<dremel_device_view>> in the
252 // preprocessed_table/device_row_comparator (which is always valid when
253 // has_nested_columns and is otherwise invalid) that is then unpacked to a
254 // cuda::std::optional<dremel_device_view> at the element_comparator level (which
255 // is always valid for a list column and otherwise invalid). We cannot use an
256 // additional template parameter for the element_comparator on a per-column
257 // basis because we cannot conditionally define dremel_device_view member
258 // variables without jumping through extra hoops with inheritance, so the
259 // cuda::std::optional<dremel_device_view> member must be an optional rather than
260 // a raw dremel_device_view.
294 template <bool has_nested_columns,
295  typename Nullate,
296  typename PhysicalElementComparator = sorting_physical_element_comparator>
298  public:
299  friend class self_comparator;
300  friend class two_table_comparator;
301 
322  Nullate check_nulls,
323  table_device_view lhs,
324  table_device_view rhs,
325  device_span<detail::dremel_device_view const> l_dremel_device_views,
326  device_span<detail::dremel_device_view const> r_dremel_device_views,
327  cuda::std::optional<device_span<int const>> depth = cuda::std::nullopt,
328  cuda::std::optional<device_span<order const>> column_order = cuda::std::nullopt,
329  cuda::std::optional<device_span<null_order const>> null_precedence = cuda::std::nullopt,
330  PhysicalElementComparator comparator = {}) noexcept
331  : _lhs{lhs},
332  _rhs{rhs},
333  _l_dremel(l_dremel_device_views),
334  _r_dremel(r_dremel_device_views),
335  _check_nulls{check_nulls},
336  _depth{depth},
337  _column_order{column_order},
338  _null_precedence{null_precedence},
339  _comparator{comparator}
340  {
341  }
342 
361  template <bool nested_disable = not has_nested_columns, CUDF_ENABLE_IF(nested_disable)>
363  Nullate check_nulls,
364  table_device_view lhs,
365  table_device_view rhs,
366  cuda::std::optional<device_span<order const>> column_order = cuda::std::nullopt,
367  cuda::std::optional<device_span<null_order const>> null_precedence = cuda::std::nullopt,
368  PhysicalElementComparator comparator = {}) noexcept
369  : _lhs{lhs},
370  _rhs{rhs},
371  _l_dremel{},
372  _r_dremel{},
373  _check_nulls{check_nulls},
374  _depth{},
375  _column_order{column_order},
376  _null_precedence{null_precedence},
377  _comparator{comparator}
378  {
379  }
380 
385  public:
402  __device__ element_comparator(Nullate check_nulls,
403  column_device_view lhs,
404  column_device_view rhs,
405  null_order null_precedence = null_order::BEFORE,
406  int depth = 0,
407  PhysicalElementComparator comparator = {},
408  optional_dremel_view l_dremel_device_view = {},
409  optional_dremel_view r_dremel_device_view = {})
410  : _lhs{lhs},
411  _rhs{rhs},
412  _check_nulls{check_nulls},
413  _null_precedence{null_precedence},
414  _depth{depth},
415  _l_dremel_device_view{l_dremel_device_view},
416  _r_dremel_device_view{r_dremel_device_view},
417  _comparator{comparator}
418  {
419  }
420 
429  template <typename Element,
430  CUDF_ENABLE_IF(cudf::is_relationally_comparable<Element, Element>())>
431  __device__ cuda::std::pair<weak_ordering, int> operator()(
432  size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
433  {
434  if (_check_nulls) {
435  bool const lhs_is_null{_lhs.is_null(lhs_element_index)};
436  bool const rhs_is_null{_rhs.is_null(rhs_element_index)};
437 
438  if (lhs_is_null or rhs_is_null) { // at least one is null
439  return cuda::std::pair(null_compare(lhs_is_null, rhs_is_null, _null_precedence), _depth);
440  }
441  }
442 
443  return cuda::std::pair(_comparator(_lhs.element<Element>(lhs_element_index),
444  _rhs.element<Element>(rhs_element_index)),
445  cuda::std::numeric_limits<int>::max());
446  }
447 
455  template <typename Element,
456  CUDF_ENABLE_IF(not cudf::is_relationally_comparable<Element, Element>() and
457  (not has_nested_columns or not cudf::is_nested<Element>()))>
458  __device__ cuda::std::pair<weak_ordering, int> operator()(size_type const,
459  size_type const) const noexcept
460  {
461  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
462  }
463 
472  template <typename Element,
473  CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::struct_view>)>
474  __device__ cuda::std::pair<weak_ordering, int> operator()(
475  size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
476  {
477  column_device_view lcol = _lhs;
478  column_device_view rcol = _rhs;
479  int depth = _depth;
480  while (lcol.type().id() == type_id::STRUCT) {
481  bool const lhs_is_null{lcol.is_null(lhs_element_index)};
482  bool const rhs_is_null{rcol.is_null(rhs_element_index)};
483 
484  if (lhs_is_null or rhs_is_null) { // at least one is null
485  weak_ordering state = null_compare(lhs_is_null, rhs_is_null, _null_precedence);
486  return cuda::std::pair(state, depth);
487  }
488 
489  if (lcol.num_child_columns() == 0) {
490  return cuda::std::pair(weak_ordering::EQUIVALENT, cuda::std::numeric_limits<int>::max());
491  }
492 
493  // Non-empty structs have been modified to only have 1 child when using this.
496  ++depth;
497  }
498 
499  return cudf::type_dispatcher<dispatch_void_if_nested>(
500  lcol.type(),
501  element_comparator{_check_nulls, lcol, rcol, _null_precedence, depth, _comparator},
502  lhs_element_index,
503  rhs_element_index);
504  }
505 
514  template <typename Element,
515  CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::list_view>)>
516  __device__ cuda::std::pair<weak_ordering, int> operator()(size_type lhs_element_index,
517  size_type rhs_element_index)
518  {
519  // only order top-NULLs according to null_order
520  auto const is_l_row_null = _lhs.is_null(lhs_element_index);
521  auto const is_r_row_null = _rhs.is_null(rhs_element_index);
522  if (is_l_row_null || is_r_row_null) {
523  return cuda::std::pair(null_compare(is_l_row_null, is_r_row_null, _null_precedence),
524  _depth);
525  }
526 
527  // These are all the values from the Dremel encoding.
528  auto const l_max_def_level = _l_dremel_device_view->max_def_level;
529  auto const r_max_def_level = _r_dremel_device_view->max_def_level;
530  auto const l_def_levels = _l_dremel_device_view->def_levels;
531  auto const r_def_levels = _r_dremel_device_view->def_levels;
532  auto const l_rep_levels = _l_dremel_device_view->rep_levels;
533  auto const r_rep_levels = _r_dremel_device_view->rep_levels;
534 
535  // Traverse the nested list hierarchy to get a column device view
536  // pointing to the underlying child data.
537  column_device_view lcol = _lhs.slice(lhs_element_index, 1);
538  column_device_view rcol = _rhs.slice(rhs_element_index, 1);
539 
540  while (lcol.type().id() == type_id::LIST) {
543  }
544 
545  // These start and end values indicate the start and end points of all
546  // the elements of the lists in the current list element
547  // (`[lhs|rhs]_element_index`) that we are comparing.
548  auto const l_offsets = _l_dremel_device_view->offsets;
549  auto const r_offsets = _r_dremel_device_view->offsets;
550  auto l_start = l_offsets[lhs_element_index];
551  auto l_end = l_offsets[lhs_element_index + 1];
552  auto r_start = r_offsets[rhs_element_index];
553  auto r_end = r_offsets[rhs_element_index + 1];
554 
555  // This comparator will be used to compare leaf (non-nested) data types.
556  auto comparator =
557  element_comparator{_check_nulls, lcol, rcol, _null_precedence, _depth, _comparator};
558 
559  // Loop over each element in the encoding. Note that this includes nulls
560  // and empty lists, so not every index corresponds to an actual element
561  // in the child column. The element_index is used to keep track of the current
562  // child element that we're actually comparing.
563  for (int l_dremel_index = l_start, r_dremel_index = r_start, element_index = 0;
564  l_dremel_index < l_end and r_dremel_index < r_end;
565  ++l_dremel_index, ++r_dremel_index) {
566  auto const l_rep_level = l_rep_levels[l_dremel_index];
567  auto const r_rep_level = r_rep_levels[r_dremel_index];
568 
569  // early exit for smaller sub-list
570  if (l_rep_level != r_rep_level) {
571  // the lower repetition level is a smaller sub-list
572  return l_rep_level < r_rep_level ? cuda::std::pair(weak_ordering::LESS, _depth)
573  : cuda::std::pair(weak_ordering::GREATER, _depth);
574  }
575 
576  // only compare if left and right are at same nesting level
577  auto const l_def_level = l_def_levels[l_dremel_index];
578  auto const r_def_level = r_def_levels[r_dremel_index];
579 
580  // either left or right are empty or NULLs of arbitrary nesting
581  if (l_def_level < l_max_def_level || r_def_level < r_max_def_level) {
582  // in the fully unraveled version of the list column, only the
583  // most nested NULLs and leafs are present
584  // In this rare condition that we get to the most nested NULL, we increment
585  // element_index because either both rows have a deeply nested NULL at the
586  // same position, and we'll "continue" in our iteration, or we will early
587  // exit if only one of the rows has a deeply nested NULL
588  if ((lcol.nullable() and l_def_levels[l_dremel_index] == l_max_def_level - 1) or
589  (rcol.nullable() and r_def_levels[r_dremel_index] == r_max_def_level - 1)) {
590  ++element_index;
591  }
592  if (l_def_level == r_def_level) { continue; }
593  // We require [] < [NULL] < [leaf] for nested nulls.
594  // The null_precedence only affects top level nulls.
595  return l_def_level < r_def_level ? cuda::std::pair(weak_ordering::LESS, _depth)
596  : cuda::std::pair(weak_ordering::GREATER, _depth);
597  }
598 
599  // finally, compare leaf to leaf
600  weak_ordering state{weak_ordering::EQUIVALENT};
601  int last_null_depth = _depth;
602  cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher<dispatch_void_if_nested>(
603  lcol.type(), comparator, element_index, element_index);
604  if (state != weak_ordering::EQUIVALENT) { return cuda::std::pair(state, _depth); }
605  ++element_index;
606  }
607 
608  // If we have reached this stage, we know that definition levels,
609  // repetition levels, and actual elements are identical in both list
610  // columns up to the `min(l_end - l_start, r_end - r_start)` element of
611  // the Dremel encoding. However, two lists can only compare equivalent if
612  // they are of the same length. Otherwise, the shorter of the two is less
613  // than the longer. This final check determines the appropriate resulting
614  // ordering by checking how many total elements each list is composed of.
615  return cuda::std::pair(detail::compare_elements(l_end - l_start, r_end - r_start), _depth);
616  }
617 
618  private:
619  column_device_view const _lhs;
620  column_device_view const _rhs;
621  Nullate const _check_nulls;
622  null_order const _null_precedence;
623  int const _depth;
624  optional_dremel_view _l_dremel_device_view;
625  optional_dremel_view _r_dremel_device_view;
626  PhysicalElementComparator const _comparator;
627  };
628 
629  public:
639  __device__ constexpr weak_ordering operator()(size_type const lhs_index,
640  size_type const rhs_index) const noexcept
641  {
642  int last_null_depth = cuda::std::numeric_limits<int>::max();
643  size_type list_column_index{-1};
644  for (size_type i = 0; i < _lhs.num_columns(); ++i) {
645  if (_lhs.column(i).type().id() == type_id::LIST) { ++list_column_index; }
646 
647  int const depth = _depth.has_value() ? (*_depth)[i] : 0;
648  if (depth > last_null_depth) { continue; }
649 
650  bool const ascending =
651  _column_order.has_value() ? (*_column_order)[i] == order::ASCENDING : true;
652 
653  null_order const null_precedence =
654  _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE;
655 
656  // TODO: At what point do we verify that the columns of lhs and rhs are
657  // all of the same types? I assume that it's already happened before
658  // here, otherwise the current code would be failing.
659  auto const [l_dremel_i, r_dremel_i] =
660  _lhs.column(i).type().id() == type_id::LIST
661  ? cuda::std::make_tuple(optional_dremel_view(_l_dremel[list_column_index]),
662  optional_dremel_view(_r_dremel[list_column_index]))
663  : cuda::std::make_tuple(optional_dremel_view{}, optional_dremel_view{});
664 
665  auto element_comp = element_comparator{_check_nulls,
666  _lhs.column(i),
667  _rhs.column(i),
668  null_precedence,
669  depth,
670  _comparator,
671  l_dremel_i,
672  r_dremel_i};
673 
674  weak_ordering state;
675  cuda::std::tie(state, last_null_depth) =
676  cudf::type_dispatcher(_lhs.column(i).type(), element_comp, lhs_index, rhs_index);
677 
678  if (state == weak_ordering::EQUIVALENT) { continue; }
679 
680  return ascending
681  ? state
682  : (state == weak_ordering::GREATER ? weak_ordering::LESS : weak_ordering::GREATER);
683  }
684  return weak_ordering::EQUIVALENT;
685  }
686 
687  private:
688  table_device_view const _lhs;
689  table_device_view const _rhs;
692  Nullate const _check_nulls;
693  cuda::std::optional<device_span<int const>> const _depth;
694  cuda::std::optional<device_span<order const>> const _column_order;
695  cuda::std::optional<device_span<null_order const>> const _null_precedence;
696  PhysicalElementComparator const _comparator;
697 }; // class device_row_comparator
698 
712 template <typename Comparator, weak_ordering... values>
713 struct weak_ordering_comparator_impl {
714  static_assert(not((weak_ordering::EQUIVALENT == values) && ...),
715  "weak_ordering_comparator should not be used for pure equality comparisons. The "
716  "`row_equality_comparator` should be used instead");
717 
718  template <typename LhsType, typename RhsType>
719  __device__ constexpr bool operator()(LhsType const lhs_index,
720  RhsType const rhs_index) const noexcept
721  {
722  weak_ordering const result = comparator(lhs_index, rhs_index);
723  return ((result == values) || ...);
724  }
725  Comparator const comparator;
726 };
727 
737 template <typename Comparator>
738 struct less_comparator : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS> {
744  less_comparator(Comparator const& comparator)
745  : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS>{comparator}
746  {
747  }
748 };
749 
760 template <typename Comparator>
762  : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS, weak_ordering::EQUIVALENT> {
768  less_equivalent_comparator(Comparator const& comparator)
769  : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS, weak_ordering::EQUIVALENT>{
770  comparator}
771  {
772  }
773 };
774 
784  std::invoke_result_t<decltype(table_device_view::create), table_view, rmm::cuda_stream_view>;
785 
808  static std::shared_ptr<preprocessed_table> create(table_view const& table,
809  host_span<order const> column_order,
810  host_span<null_order const> null_precedence,
811  rmm::cuda_stream_view stream);
812 
834  static std::pair<std::shared_ptr<preprocessed_table>, std::shared_ptr<preprocessed_table>> create(
835  table_view const& lhs,
836  table_view const& rhs,
837  host_span<order const> column_order,
838  host_span<null_order const> null_precedence,
839  rmm::cuda_stream_view stream);
840 
841  private:
842  friend class self_comparator;
843  friend class two_table_comparator;
844 
864  static std::shared_ptr<preprocessed_table> create(
865  table_view const& preprocessed_input,
866  std::vector<int>&& verticalized_col_depths,
867  std::vector<std::unique_ptr<column>>&& transformed_columns,
868  host_span<order const> column_order,
869  host_span<null_order const> null_precedence,
870  bool has_ranked_children,
871  rmm::cuda_stream_view stream);
872 
900  rmm::device_uvector<order>&& column_order,
901  rmm::device_uvector<null_order>&& null_precedence,
903  std::vector<detail::dremel_data>&& dremel_data,
905  std::vector<std::unique_ptr<column>>&& transformed_columns,
906  bool has_ranked_children);
907 
909  rmm::device_uvector<order>&& column_order,
910  rmm::device_uvector<null_order>&& null_precedence,
912  std::vector<std::unique_ptr<column>>&& transformed_columns,
913  bool has_ranked_children);
914 
920  operator table_device_view() { return *_t; }
921 
928  [[nodiscard]] cuda::std::optional<device_span<order const>> column_order() const
929  {
930  return _column_order.size() ? cuda::std::optional<device_span<order const>>(_column_order)
931  : cuda::std::nullopt;
932  }
933 
941  [[nodiscard]] cuda::std::optional<device_span<null_order const>> null_precedence() const
942  {
943  return _null_precedence.size()
944  ? cuda::std::optional<device_span<null_order const>>(_null_precedence)
945  : cuda::std::nullopt;
946  }
947 
956  [[nodiscard]] cuda::std::optional<device_span<int const>> depths() const
957  {
958  return _depths.size() ? cuda::std::optional<device_span<int const>>(_depths)
959  : cuda::std::nullopt;
960  }
961 
962  [[nodiscard]] device_span<detail::dremel_device_view const> dremel_device_views() const
963  {
964  if (_dremel_device_views.has_value()) {
965  return device_span<detail::dremel_device_view const>(*_dremel_device_views);
966  } else {
967  return {};
968  }
969  }
970 
971  template <typename PhysicalElementComparator>
972  void check_physical_element_comparator()
973  {
974  if constexpr (!std::is_same_v<PhysicalElementComparator, sorting_physical_element_comparator>) {
975  CUDF_EXPECTS(!_has_ranked_children,
976  "The input table has nested type children and they were transformed using a "
977  "different type of physical element comparator.");
978  }
979  }
980 
981  private:
982  table_device_view_owner const _t;
983  rmm::device_uvector<order> const _column_order;
984  rmm::device_uvector<null_order> const _null_precedence;
985  rmm::device_uvector<size_type> const _depths;
986 
987  // Dremel encoding of list columns used for the comparison algorithm
988  cuda::std::optional<std::vector<detail::dremel_data>> _dremel_data;
989  cuda::std::optional<rmm::device_uvector<detail::dremel_device_view>> _dremel_device_views;
990 
991  // Intermediate columns generated from transforming nested children columns into
992  // integers columns using `cudf::rank()`, need to be kept alive.
993  std::vector<std::unique_ptr<column>> _transformed_columns;
994 
995  // Flag to record if the input table was preprocessed to transform any nested children column(s)
996  // into integer column(s) using `cudf::rank`.
997  bool const _has_ranked_children;
998 };
999 
1017  public:
1033  host_span<order const> column_order = {},
1034  host_span<null_order const> null_precedence = {},
1036  : d_t{preprocessed_table::create(t, column_order, null_precedence, stream)}
1037  {
1038  }
1039 
1049  self_comparator(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
1050 
1079  template <bool has_nested_columns,
1080  typename Nullate,
1081  typename PhysicalElementComparator = sorting_physical_element_comparator>
1082  auto less(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
1083  {
1084  d_t->check_physical_element_comparator<PhysicalElementComparator>();
1085 
1086  return less_comparator{
1087  device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1088  nullate,
1089  *d_t,
1090  *d_t,
1091  d_t->dremel_device_views(),
1092  d_t->dremel_device_views(),
1093  d_t->depths(),
1094  d_t->column_order(),
1095  d_t->null_precedence(),
1096  comparator}};
1097  }
1098 
1100  template <bool has_nested_columns,
1101  typename Nullate,
1102  typename PhysicalElementComparator = sorting_physical_element_comparator>
1103  auto less_equivalent(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
1104  {
1105  d_t->check_physical_element_comparator<PhysicalElementComparator>();
1106 
1107  return less_equivalent_comparator{
1108  device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1109  nullate,
1110  *d_t,
1111  *d_t,
1112  d_t->dremel_device_views(),
1113  d_t->dremel_device_views(),
1114  d_t->depths(),
1115  d_t->column_order(),
1116  d_t->null_precedence(),
1117  comparator}};
1118  }
1119 
1120  private:
1121  std::shared_ptr<preprocessed_table> d_t;
1122 };
1123 
1124 // @cond
1125 template <typename Comparator>
1126 struct strong_index_comparator_adapter {
1127  strong_index_comparator_adapter(Comparator const& comparator) : comparator{comparator} {}
1128 
1129  __device__ constexpr weak_ordering operator()(lhs_index_type const lhs_index,
1130  rhs_index_type const rhs_index) const noexcept
1131  {
1132  return comparator(static_cast<cudf::size_type>(lhs_index),
1133  static_cast<cudf::size_type>(rhs_index));
1134  }
1135 
1136  __device__ constexpr weak_ordering operator()(rhs_index_type const rhs_index,
1137  lhs_index_type const lhs_index) const noexcept
1138  {
1139  auto const left_right_ordering =
1140  comparator(static_cast<cudf::size_type>(lhs_index), static_cast<cudf::size_type>(rhs_index));
1141 
1142  // Invert less/greater values to reflect right to left ordering
1143  if (left_right_ordering == weak_ordering::LESS) {
1144  return weak_ordering::GREATER;
1145  } else if (left_right_ordering == weak_ordering::GREATER) {
1146  return weak_ordering::LESS;
1147  }
1148  return weak_ordering::EQUIVALENT;
1149  }
1150 
1151  Comparator const comparator;
1152 };
1153 // @endcond
1154 
1173  public:
1193  table_view const& right,
1194  host_span<order const> column_order = {},
1195  host_span<null_order const> null_precedence = {},
1197 
1212  two_table_comparator(std::shared_ptr<preprocessed_table> left,
1213  std::shared_ptr<preprocessed_table> right)
1214  : d_left_table{std::move(left)}, d_right_table{std::move(right)}
1215  {
1216  }
1217 
1254  template <bool has_nested_columns,
1255  typename Nullate,
1256  typename PhysicalElementComparator = sorting_physical_element_comparator>
1257  auto less(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
1258  {
1259  d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
1260  d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
1261 
1262  return less_comparator{strong_index_comparator_adapter{
1263  device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1264  nullate,
1265  *d_left_table,
1266  *d_right_table,
1267  d_left_table->dremel_device_views(),
1268  d_right_table->dremel_device_views(),
1269  d_left_table->depths(),
1270  d_left_table->column_order(),
1271  d_left_table->null_precedence(),
1272  comparator}}};
1273  }
1274 
1276  template <bool has_nested_columns,
1277  typename Nullate,
1278  typename PhysicalElementComparator = sorting_physical_element_comparator>
1279  auto less_equivalent(Nullate nullate = {}, PhysicalElementComparator comparator = {}) const
1280  {
1281  d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
1282  d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
1283 
1284  return less_equivalent_comparator{strong_index_comparator_adapter{
1285  device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1286  nullate,
1287  *d_left_table,
1288  *d_right_table,
1289  d_left_table->dremel_device_views(),
1290  d_right_table->dremel_device_views(),
1291  d_left_table->depths(),
1292  d_left_table->column_order(),
1293  d_left_table->null_precedence(),
1294  comparator}}};
1295  }
1296 
1297  private:
1298  std::shared_ptr<preprocessed_table> d_left_table;
1299  std::shared_ptr<preprocessed_table> d_right_table;
1300 };
1301 
1302 } // namespace lexicographic
1303 
1304 namespace hash {
1305 class row_hasher;
1306 } // namespace hash
1307 
1308 namespace equality {
1309 
1328  template <typename Element>
1329  __device__ constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
1330  {
1331  return lhs == rhs;
1332  }
1333 };
1334 
1350  template <typename Element, CUDF_ENABLE_IF(not std::is_floating_point_v<Element>)>
1351  __device__ constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
1352  {
1353  return lhs == rhs;
1354  }
1355 
1365  template <typename Element, CUDF_ENABLE_IF(std::is_floating_point_v<Element>)>
1366  __device__ constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
1367  {
1368  return isnan(lhs) and isnan(rhs) ? true : lhs == rhs;
1369  }
1370 };
1371 
1396 template <bool has_nested_columns,
1397  typename Nullate,
1398  typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
1400  friend class self_comparator;
1401  friend class two_table_comparator;
1402 
1403  public:
1412  __device__ constexpr bool operator()(size_type const lhs_index,
1413  size_type const rhs_index) const noexcept
1414  {
1415  auto equal_elements = [lhs_index, rhs_index, this](column_device_view l, column_device_view r) {
1416  return cudf::type_dispatcher(
1417  l.type(),
1418  element_comparator{check_nulls, l, r, nulls_are_equal, comparator},
1419  lhs_index,
1420  rhs_index);
1421  };
1422 
1423  return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements);
1424  }
1425 
1426  private:
1437  device_row_comparator(Nullate check_nulls,
1438  table_device_view lhs,
1439  table_device_view rhs,
1440  null_equality nulls_are_equal = null_equality::EQUAL,
1441  PhysicalEqualityComparator comparator = {}) noexcept
1442  : lhs{lhs},
1443  rhs{rhs},
1444  check_nulls{check_nulls},
1445  nulls_are_equal{nulls_are_equal},
1446  comparator{comparator}
1447  {
1448  }
1449 
1453  class element_comparator {
1454  public:
1467  __device__ element_comparator(Nullate check_nulls,
1468  column_device_view lhs,
1469  column_device_view rhs,
1470  null_equality nulls_are_equal = null_equality::EQUAL,
1471  PhysicalEqualityComparator comparator = {}) noexcept
1472  : lhs{lhs},
1473  rhs{rhs},
1474  check_nulls{check_nulls},
1475  nulls_are_equal{nulls_are_equal},
1476  comparator{comparator}
1477  {
1478  }
1479 
1488  template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
1489  __device__ bool operator()(size_type const lhs_element_index,
1490  size_type const rhs_element_index) const noexcept
1491  {
1492  if (check_nulls) {
1493  bool const lhs_is_null{lhs.is_null(lhs_element_index)};
1494  bool const rhs_is_null{rhs.is_null(rhs_element_index)};
1495  if (lhs_is_null and rhs_is_null) {
1496  return nulls_are_equal == null_equality::EQUAL;
1497  } else if (lhs_is_null != rhs_is_null) {
1498  return false;
1499  }
1500  }
1501 
1502  return comparator(lhs.element<Element>(lhs_element_index),
1503  rhs.element<Element>(rhs_element_index));
1504  }
1505 
1506  template <typename Element,
1507  CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>() and
1508  (not has_nested_columns or not cudf::is_nested<Element>())),
1509  typename... Args>
1510  __device__ bool operator()(Args...)
1511  {
1512  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
1513  }
1514 
1515  template <typename Element, CUDF_ENABLE_IF(has_nested_columns and cudf::is_nested<Element>())>
1516  __device__ bool operator()(size_type const lhs_element_index,
1517  size_type const rhs_element_index) const noexcept
1518  {
1519  column_device_view lcol = lhs.slice(lhs_element_index, 1);
1520  column_device_view rcol = rhs.slice(rhs_element_index, 1);
1521  while (lcol.type().id() == type_id::STRUCT || lcol.type().id() == type_id::LIST) {
1522  if (check_nulls) {
1523  auto lvalid = detail::make_validity_iterator<true>(lcol);
1524  auto rvalid = detail::make_validity_iterator<true>(rcol);
1525  if (nulls_are_equal == null_equality::UNEQUAL) {
1526  if (thrust::any_of(
1527  thrust::seq, lvalid, lvalid + lcol.size(), cuda::std::logical_not<bool>()) or
1528  thrust::any_of(
1529  thrust::seq, rvalid, rvalid + rcol.size(), cuda::std::logical_not<bool>())) {
1530  return false;
1531  }
1532  } else {
1533  if (not thrust::equal(thrust::seq, lvalid, lvalid + lcol.size(), rvalid)) {
1534  return false;
1535  }
1536  }
1537  }
1538  if (lcol.type().id() == type_id::STRUCT) {
1539  if (lcol.num_child_columns() == 0) { return true; }
1540  // Non-empty structs are assumed to be decomposed and contain only one child
1541  lcol = detail::structs_column_device_view(lcol).get_sliced_child(0);
1542  rcol = detail::structs_column_device_view(rcol).get_sliced_child(0);
1543  } else if (lcol.type().id() == type_id::LIST) {
1544  auto l_list_col = detail::lists_column_device_view(lcol);
1545  auto r_list_col = detail::lists_column_device_view(rcol);
1546 
1547  auto lsizes = make_list_size_iterator(l_list_col);
1548  auto rsizes = make_list_size_iterator(r_list_col);
1549  if (not thrust::equal(thrust::seq, lsizes, lsizes + lcol.size(), rsizes)) {
1550  return false;
1551  }
1552 
1553  lcol = l_list_col.get_sliced_child();
1554  rcol = r_list_col.get_sliced_child();
1555  if (lcol.size() != rcol.size()) { return false; }
1556  }
1557  }
1558 
1559  auto comp = column_comparator{
1560  element_comparator{check_nulls, lcol, rcol, nulls_are_equal, comparator}, lcol.size()};
1561  return type_dispatcher<dispatch_void_if_nested>(lcol.type(), comp);
1562  }
1563 
1564  private:
1572  struct column_comparator {
1573  element_comparator const comp;
1574  size_type const size;
1575 
1581  template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
1582  __device__ bool operator()() const noexcept
1583  {
1584  return thrust::all_of(thrust::seq,
1585  thrust::make_counting_iterator(0),
1586  thrust::make_counting_iterator(0) + size,
1587  [this](auto i) { return comp.template operator()<Element>(i, i); });
1588  }
1589 
1590  template <typename Element,
1591  CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>()),
1592  typename... Args>
1593  __device__ bool operator()(Args...) const noexcept
1594  {
1595  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
1596  }
1597  };
1598 
1599  column_device_view const lhs;
1600  column_device_view const rhs;
1601  Nullate const check_nulls;
1602  null_equality const nulls_are_equal;
1603  PhysicalEqualityComparator const comparator;
1604  };
1605 
1606  table_device_view const lhs;
1607  table_device_view const rhs;
1608  Nullate const check_nulls;
1609  null_equality const nulls_are_equal;
1610  PhysicalEqualityComparator const comparator;
1611 };
1612 
1632  static std::shared_ptr<preprocessed_table> create(table_view const& table,
1633  rmm::cuda_stream_view stream);
1634 
1635  private:
1636  friend class self_comparator;
1637  friend class two_table_comparator;
1638  friend class hash::row_hasher;
1640  friend class ::cudf::row::primitive::row_equality_comparator;
1641 
1642  template <template <typename> class Hash>
1643  friend class ::cudf::row::primitive::row_hasher;
1644 
1645  using table_device_view_owner =
1646  std::invoke_result_t<decltype(table_device_view::create), table_view, rmm::cuda_stream_view>;
1647 
1648  preprocessed_table(table_device_view_owner&& table,
1649  std::vector<rmm::device_buffer>&& null_buffers,
1650  std::vector<std::unique_ptr<column>>&& tmp_columns)
1651  : _t(std::move(table)),
1652  _null_buffers(std::move(null_buffers)),
1653  _tmp_columns(std::move(tmp_columns))
1654  {
1655  }
1656 
1662  operator table_device_view() { return *_t; }
1663 
1664  table_device_view_owner _t;
1665  std::vector<rmm::device_buffer> _null_buffers;
1666  std::vector<std::unique_ptr<column>> _tmp_columns;
1667 };
1668 
1676  public:
1686  : d_t(preprocessed_table::create(t, stream))
1687  {
1688  }
1689 
1699  self_comparator(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
1700 
1726  template <bool has_nested_columns,
1727  typename Nullate,
1728  typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
1729  auto equal_to(Nullate nullate = {},
1730  null_equality nulls_are_equal = null_equality::EQUAL,
1731  PhysicalEqualityComparator comparator = {}) const noexcept
1732  {
1733  return device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>{
1734  nullate, *d_t, *d_t, nulls_are_equal, comparator};
1735  }
1736 
1737  private:
1738  std::shared_ptr<preprocessed_table> d_t;
1739 };
1740 
1741 // @cond
1742 template <typename Comparator>
1743 struct strong_index_comparator_adapter {
1744  strong_index_comparator_adapter(Comparator const& comparator) : comparator{comparator} {}
1745 
1746  __device__ constexpr bool operator()(lhs_index_type const lhs_index,
1747  rhs_index_type const rhs_index) const noexcept
1748  {
1749  return comparator(static_cast<cudf::size_type>(lhs_index),
1750  static_cast<cudf::size_type>(rhs_index));
1751  }
1752 
1753  __device__ constexpr bool operator()(rhs_index_type const rhs_index,
1754  lhs_index_type const lhs_index) const noexcept
1755  {
1756  return this->operator()(lhs_index, rhs_index);
1757  }
1758 
1759  Comparator const comparator;
1760 };
1761 // @endcond
1762 
1780  public:
1794  table_view const& right,
1795  rmm::cuda_stream_view stream);
1796 
1807  two_table_comparator(std::shared_ptr<preprocessed_table> left,
1808  std::shared_ptr<preprocessed_table> right)
1809  : d_left_table{std::move(left)}, d_right_table{std::move(right)}
1810  {
1811  }
1812 
1843  template <bool has_nested_columns,
1844  typename Nullate,
1845  typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
1846  auto equal_to(Nullate nullate = {},
1847  null_equality nulls_are_equal = null_equality::EQUAL,
1848  PhysicalEqualityComparator comparator = {}) const noexcept
1849  {
1850  return strong_index_comparator_adapter{
1851  device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>(
1852  nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)};
1853  }
1854 
1855  private:
1856  std::shared_ptr<preprocessed_table> d_left_table;
1857  std::shared_ptr<preprocessed_table> d_right_table;
1858 };
1859 
1860 } // namespace equality
1861 
1862 namespace hash {
1863 
1873 template <template <typename> class hash_function, typename Nullate>
1875  public:
1883  __device__ element_hasher(
1884  Nullate nulls,
1885  uint32_t seed = DEFAULT_HASH_SEED,
1886  hash_value_type null_hash = cuda::std::numeric_limits<hash_value_type>::max()) noexcept
1887  : _check_nulls(nulls), _seed(seed), _null_hash(null_hash)
1888  {
1889  }
1890 
1899  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
1900  __device__ hash_value_type operator()(column_device_view const& col,
1901  size_type row_index) const noexcept
1902  {
1903  if (_check_nulls && col.is_null(row_index)) { return _null_hash; }
1904  return hash_function<T>{_seed}(col.element<T>(row_index));
1905  }
1906 
1915  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
1916  __device__ hash_value_type operator()(column_device_view const& col,
1917  size_type row_index) const noexcept
1918  {
1919  CUDF_UNREACHABLE("Unsupported type in hash.");
1920  }
1921 
1922  Nullate _check_nulls;
1923  uint32_t _seed;
1925 };
1926 
1936 template <template <typename> class hash_function, typename Nullate>
1938  friend class row_hasher;
1939 
1940  public:
1947  __device__ auto operator()(size_type row_index) const noexcept
1948  {
1949  auto it =
1950  thrust::make_transform_iterator(_table.begin(), [row_index, this](auto const& column) {
1951  return cudf::type_dispatcher<dispatch_storage_type>(
1952  column.type(),
1953  element_hasher_adapter<hash_function>{_check_nulls, _seed},
1954  column,
1955  row_index);
1956  });
1957 
1958  // Hash each element and combine all the hash values together
1959  return detail::accumulate(it, it + _table.num_columns(), _seed, [](auto hash, auto h) {
1960  return cudf::hashing::detail::hash_combine(hash, h);
1961  });
1962  }
1963 
1964  private:
1972  template <template <typename> class hash_fn>
1973  class element_hasher_adapter {
1974  static constexpr hash_value_type NULL_HASH = cuda::std::numeric_limits<hash_value_type>::max();
1975  static constexpr hash_value_type NON_NULL_HASH = 0;
1976 
1977  public:
1978  __device__ element_hasher_adapter(Nullate check_nulls, uint32_t seed) noexcept
1979  : _element_hasher(check_nulls, seed), _check_nulls(check_nulls)
1980  {
1981  }
1982 
1983  template <typename T, CUDF_ENABLE_IF(not cudf::is_nested<T>())>
1984  __device__ hash_value_type operator()(column_device_view const& col,
1985  size_type row_index) const noexcept
1986  {
1987  return _element_hasher.template operator()<T>(col, row_index);
1988  }
1989 
1990  template <typename T, CUDF_ENABLE_IF(cudf::is_nested<T>())>
1991  __device__ hash_value_type operator()(column_device_view const& col,
1992  size_type row_index) const noexcept
1993  {
1994  auto hash = hash_value_type{0};
1995  column_device_view curr_col = col.slice(row_index, 1);
1996  while (curr_col.type().id() == type_id::STRUCT || curr_col.type().id() == type_id::LIST) {
1997  if (_check_nulls) {
1998  auto validity_it = detail::make_validity_iterator<true>(curr_col);
1999  hash = detail::accumulate(
2000  validity_it, validity_it + curr_col.size(), hash, [](auto hash, auto is_valid) {
2001  return cudf::hashing::detail::hash_combine(hash,
2002  is_valid ? NON_NULL_HASH : NULL_HASH);
2003  });
2004  }
2005  if (curr_col.type().id() == type_id::STRUCT) {
2006  if (curr_col.num_child_columns() == 0) { return hash; }
2007  // Non-empty structs are assumed to be decomposed and contain only one child
2008  curr_col = detail::structs_column_device_view(curr_col).get_sliced_child(0);
2009  } else if (curr_col.type().id() == type_id::LIST) {
2010  auto list_col = detail::lists_column_device_view(curr_col);
2011  auto list_sizes = make_list_size_iterator(list_col);
2012  hash = detail::accumulate(
2013  list_sizes, list_sizes + list_col.size(), hash, [](auto hash, auto size) {
2014  return cudf::hashing::detail::hash_combine(hash, hash_fn<size_type>{}(size));
2015  });
2016  curr_col = list_col.get_sliced_child();
2017  }
2018  }
2019  for (int i = 0; i < curr_col.size(); ++i) {
2020  hash = cudf::hashing::detail::hash_combine(
2021  hash,
2022  type_dispatcher<dispatch_void_if_nested>(curr_col.type(), _element_hasher, curr_col, i));
2023  }
2024  return hash;
2025  }
2026 
2027  element_hasher<hash_fn, Nullate> const _element_hasher;
2028  Nullate const _check_nulls;
2029  };
2030 
2031  CUDF_HOST_DEVICE device_row_hasher(Nullate check_nulls,
2032  table_device_view t,
2033  uint32_t seed = DEFAULT_HASH_SEED) noexcept
2034  : _check_nulls{check_nulls}, _table{t}, _seed(seed)
2035  {
2036  }
2037 
2038  Nullate const _check_nulls;
2039  table_device_view const _table;
2040  uint32_t const _seed;
2041 };
2042 
2043 // Inject row::equality::preprocessed_table into the row::hash namespace
2044 // As a result, row::equality::preprocessed_table and row::hash::preprocessed table are the same
2045 // type and are interchangeable.
2048 
2055 class row_hasher {
2056  public:
2065  : d_t(preprocessed_table::create(t, stream))
2066  {
2067  }
2068 
2078  row_hasher(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
2079 
2092  template <
2093  template <typename> class hash_function = cudf::hashing::detail::default_hash,
2094  template <template <typename> class, typename> class DeviceRowHasher = device_row_hasher,
2095  typename Nullate>
2096  DeviceRowHasher<hash_function, Nullate> device_hasher(Nullate nullate = {},
2097  uint32_t seed = DEFAULT_HASH_SEED) const
2098  {
2099  return DeviceRowHasher<hash_function, Nullate>(nullate, *d_t, seed);
2100  }
2101 
2102  private:
2103  std::shared_ptr<preprocessed_table> d_t;
2104 };
2105 
2106 } // namespace hash
2107 
2108 } // namespace row
2109 
2110 } // namespace experimental
2111 } // namespace CUDF_EXPORT cudf
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
T element(size_type element_index) const noexcept
Returns a copy of the element at the specified index.
CUDF_HOST_DEVICE column_device_view slice(size_type offset, size_type size) const noexcept
Get a new column_device_view which is a slice of this column.
CUDF_HOST_DEVICE size_type num_child_columns() const noexcept
Returns the number of child columns.
const_iterator< T > begin() const
Return an iterator to the first element of the column.
const_iterator< T > end() const
Returns an iterator to the element following the last element of the column.
A container of nullable device data as a column of elements.
Definition: column.hpp:47
constexpr CUDF_HOST_DEVICE type_id id() const noexcept
Returns the type identifier.
Definition: types.hpp:293
CUDF_HOST_DEVICE data_type type() const noexcept
Returns the element type.
bool is_null(size_type element_index) const noexcept
Returns whether the specified element is null.
CUDF_HOST_DEVICE bool nullable() const noexcept
Indicates whether the column can contain null elements, i.e., if it has an allocated bitmask.
Given a column_device_view, an instance of this class provides a wrapper on this compound column for ...
column_device_view get_sliced_child() const
Fetches the child column of the underlying list column with offset and size applied.
Given a column_device_view, an instance of this class provides a wrapper on this compound column for ...
column_device_view get_sliced_child(size_type idx) const
Fetches the child column of the underlying struct column.
constexpr bool operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table is equal to the row at rhs_index in the rhs tabl...
Comparator for performing equality comparisons between two rows of the same table.
self_comparator(std::shared_ptr< preprocessed_table > t)
Construct an owning object for performing equality comparisons between two rows of the same table.
self_comparator(table_view const &t, rmm::cuda_stream_view stream)
Construct an owning object for performing equality comparisons between two rows of the same table.
auto equal_to(Nullate nullate={}, null_equality nulls_are_equal=null_equality::EQUAL, PhysicalEqualityComparator comparator={}) const noexcept
Get the comparison operator to use on the device.
An owning object that can be used to equality compare rows of two different tables.
two_table_comparator(std::shared_ptr< preprocessed_table > left, std::shared_ptr< preprocessed_table > right)
Construct an owning object for performing equality comparisons between two rows from two tables.
auto equal_to(Nullate nullate={}, null_equality nulls_are_equal=null_equality::EQUAL, PhysicalEqualityComparator comparator={}) const noexcept
Return the binary operator for comparing rows in the table.
two_table_comparator(table_view const &left, table_view const &right, rmm::cuda_stream_view stream)
Construct an owning object for performing equality comparisons between two rows from two tables.
Computes the hash value of a row in the given table.
auto operator()(size_type row_index) const noexcept
Return the hash value of a row in the given table.
Computes the hash value of an element in the given column.
element_hasher(Nullate nulls, uint32_t seed=DEFAULT_HASH_SEED, hash_value_type null_hash=cuda::std::numeric_limits< hash_value_type >::max()) noexcept
Constructs an element_hasher object.
hash_value_type _null_hash
Hash value to use for null elements.
Computes the hash value of a row in the given table.
DeviceRowHasher< hash_function, Nullate > device_hasher(Nullate nullate={}, uint32_t seed=DEFAULT_HASH_SEED) const
Get the hash operator to use on the device.
row_hasher(table_view const &t, rmm::cuda_stream_view stream)
Construct an owning object for hashing the rows of a table.
row_hasher(std::shared_ptr< preprocessed_table > t)
Construct an owning object for hashing the rows of a table from an existing preprocessed_table.
Performs a relational comparison between two elements in two columns.
cuda::std::pair< weak_ordering, int > operator()(size_type lhs_element_index, size_type rhs_element_index)
Compares two list-type columns.
cuda::std::pair< weak_ordering, int > operator()(size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
Performs a relational comparison between the specified elements.
cuda::std::pair< weak_ordering, int > operator()(size_type const, size_type const) const noexcept
Throws run-time error when columns types cannot be compared or if this class is instantiated with has...
element_comparator(Nullate check_nulls, column_device_view lhs, column_device_view rhs, null_order null_precedence=null_order::BEFORE, int depth=0, PhysicalElementComparator comparator={}, optional_dremel_view l_dremel_device_view={}, optional_dremel_view r_dremel_device_view={})
Construct type-dispatched function object for performing a relational comparison between two elements...
Computes the lexicographic comparison between 2 rows.
constexpr weak_ordering operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table compares lexicographically less,...
device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, cuda::std::optional< device_span< order const >> column_order=cuda::std::nullopt, cuda::std::optional< device_span< null_order const >> null_precedence=cuda::std::nullopt, PhysicalElementComparator comparator={}) noexcept
Construct a function object for performing a lexicographic comparison between the rows of two tables....
device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, device_span< detail::dremel_device_view const > l_dremel_device_views, device_span< detail::dremel_device_view const > r_dremel_device_views, cuda::std::optional< device_span< int const >> depth=cuda::std::nullopt, cuda::std::optional< device_span< order const >> column_order=cuda::std::nullopt, cuda::std::optional< device_span< null_order const >> null_precedence=cuda::std::nullopt, PhysicalElementComparator comparator={}) noexcept
Construct a function object for performing a lexicographic comparison between the rows of two tables.
An owning object that can be used to lexicographically compare two rows of the same table.
auto less_equivalent(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
self_comparator(table_view const &t, host_span< order const > column_order={}, host_span< null_order const > null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct an owning object for performing a lexicographic comparison between two rows of the same tab...
self_comparator(std::shared_ptr< preprocessed_table > t)
Construct an owning object for performing a lexicographic comparison between two rows of the same pre...
auto less(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
An owning object that can be used to lexicographically compare rows of two different tables.
two_table_comparator(std::shared_ptr< preprocessed_table > left, std::shared_ptr< preprocessed_table > right)
Construct an owning object for performing a lexicographic comparison between two rows of the same pre...
auto less(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
two_table_comparator(table_view const &left, table_view const &right, host_span< order const > column_order={}, host_span< null_order const > null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct an owning object for performing a lexicographic comparison between rows of two different ta...
auto less_equivalent(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
Table device view that is usable in device memory.
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:200
A set of cudf::column's of the same size.
Definition: table.hpp:40
Column device view class definitions.
lhs_index_type
Strongly typed index for left-hand side table rows.
cuda::std::optional< detail::dremel_device_view const > optional_dremel_view
Optional dremel device view for handling nested column structures.
rhs_index_type
Strongly typed index for right-hand side table rows.
uint32_t hash_value_type
Type of hash value.
Definition: hashing.hpp:29
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
std::unique_ptr< cudf::column > is_valid(cudf::column_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a column of type_id::BOOL8 elements where for every element in input true indicates the value...
CUDF_HOST_DEVICE constexpr decltype(auto) __forceinline__ type_dispatcher(cudf::data_type dtype, Functor f, Ts &&... args)
Invokes an operator() template with the type instantiation based on the specified cudf::data_type's i...
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:154
null_order
Indicates how null values compare against all other values.
Definition: types.hpp:159
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:151
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
#define CUDF_ENABLE_IF(...)
Convenience macro for SFINAE as an unnamed template parameter.
Definition: traits.hpp:50
cuDF interfaces
Definition: host_udf.hpp:37
weak_ordering compare_elements(Element lhs, Element rhs)
Alias for backward compatibility with legacy row operators.
auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
Alias for backward compatibility with legacy row operators.
cudf::detail::weak_ordering weak_ordering
Alias for backward compatibility with legacy row operators.
CUDF_HOST_DEVICE auto make_list_size_iterator(detail::lists_column_device_view const &c)
Makes an iterator that returns size of the list by row index.
DEPRECATED: This header is deprecated as of 25.10 and will be removed in 25.12.
Column APIs for sort and rank.
Device version of C++20 std::span with reduced feature set.
Definition: span.hpp:355
A map from cudf::type_id to cudf type that excludes LIST and STRUCT types.
std::conditional_t< t==type_id::STRUCT or t==type_id::LIST, void, id_to_type< t > > type
The type to dispatch to if the type is nested.
Equality comparator functor that compares physical values rather than logical elements like lists,...
constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
Operator for equality comparison of non-floating point values.
Equality comparator functor that compares physical values rather than logical elements like lists,...
constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
Operator for equality comparisons.
Preprocessed table for use with row equality comparison or row hashing.
static std::shared_ptr< preprocessed_table > create(table_view const &table, rmm::cuda_stream_view stream)
Factory to construct preprocessed_table for use with row equality comparison or row hashing.
Wraps and interprets the result of device_row_comparator, true if the result is weak_ordering::LESS m...
less_comparator(Comparator const &comparator)
Constructs a less_comparator.
Wraps and interprets the result of device_row_comparator, true if the result is weak_ordering::LESS o...
less_equivalent_comparator(Comparator const &comparator)
Constructs a less_equivalent_comparator.
Computes a weak ordering of two values with special sorting behavior.
constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
Operator for relational comparisons.
Preprocessed table for use with lexicographical comparison.
std::invoke_result_t< decltype(table_device_view::create), table_view, rmm::cuda_stream_view > table_device_view_owner
Type of table device view owner for the preprocessed table.
static std::pair< std::shared_ptr< preprocessed_table >, std::shared_ptr< preprocessed_table > > create(table_view const &lhs, table_view const &rhs, host_span< order const > column_order, host_span< null_order const > null_precedence, rmm::cuda_stream_view stream)
Preprocess tables for use with lexicographical comparison.
static std::shared_ptr< preprocessed_table > create(table_view const &table, host_span< order const > column_order, host_span< null_order const > null_precedence, rmm::cuda_stream_view stream)
Preprocess table for use with lexicographical comparison.
Relational comparator functor that compares physical values rather than logical elements like lists,...
constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
Operator for relational comparison of non-floating point values.
A counting iterator that uses strongly typed indices bound to tables.
thrust::iterator_adaptor< strong_index_iterator< Index >, Index > super_t
The base class.
constexpr strong_index_iterator(Underlying n)
Constructs a strong index iterator.
C++20 std::span with reduced feature set.
Definition: span.hpp:194
Indicates the presence of nulls at compile-time or runtime.
Table device view class definitions.
bool has_nested_columns(table_view const &table)
Determine if any nested columns exist in a given table.
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.
Definition: types.hpp:32