26 #include <cudf/detail/iterator.cuh>
27 #include <cudf/detail/utilities/algorithm.cuh>
28 #include <cudf/detail/utilities/assert.cuh>
29 #include <cudf/hashing/detail/default_hash.cuh>
30 #include <cudf/hashing/detail/hashing.hpp>
31 #include <cudf/lists/detail/dremel.hpp>
32 #include <cudf/lists/list_device_view.cuh>
33 #include <cudf/lists/lists_column_device_view.cuh>
35 #include <cudf/structs/structs_column_device_view.cuh>
42 #include <cuda/std/functional>
43 #include <cuda/std/limits>
44 #include <cuda/std/optional>
45 #include <cuda/std/tuple>
46 #include <cuda/std/utility>
47 #include <thrust/detail/use_default.h>
48 #include <thrust/equal.h>
49 #include <thrust/execution_policy.h>
50 #include <thrust/functional.h>
51 #include <thrust/iterator/counting_iterator.h>
52 #include <thrust/iterator/iterator_adaptor.h>
53 #include <thrust/iterator/iterator_categories.h>
54 #include <thrust/iterator/iterator_facade.h>
55 #include <thrust/iterator/transform_iterator.h>
56 #include <thrust/logical.h>
57 #include <thrust/swap.h>
58 #include <thrust/transform_reduce.h>
61 #include <type_traits>
63 namespace CUDF_EXPORT
cudf {
65 namespace row::primitive {
66 class row_equality_comparator;
68 template <
template <
typename>
class Hash>
72 namespace experimental {
92 template <cudf::type_
id t>
95 using type = std::conditional_t<t == type_id::STRUCT or t == type_id::LIST, void, id_to_type<t>>;
121 template <
typename Index,
typename Underlying = std::underlying_type_t<Index>>
125 thrust::random_access_traversal_tag,
129 thrust::iterator_adaptor<strong_index_iterator<Index>, Index>;
138 friend class thrust::iterator_core_access;
141 __device__ constexpr
void increment() { ++begin; }
142 __device__ constexpr
void decrement() { --begin; }
144 __device__ constexpr
void advance(Underlying n) { begin += n; }
146 __device__ constexpr
bool equal(strong_index_iterator<Index>
const& other)
const noexcept
148 return begin == other.begin;
151 __device__ constexpr Index dereference() const noexcept {
return static_cast<Index
>(begin); }
153 __device__ constexpr Underlying distance_to(
154 strong_index_iterator<Index>
const& other)
const noexcept
156 return other.begin - begin;
172 namespace lexicographic {
192 template <
typename Element>
215 template <
typename Element, CUDF_ENABLE_IF(not std::is_
floating_po
int_v<Element>)>
228 template <
typename Element, CUDF_ENABLE_IF(std::is_
floating_po
int_v<Element>)>
232 return isnan(rhs) ? weak_ordering::EQUIVALENT : weak_ordering::GREATER;
233 }
else if (isnan(rhs)) {
234 return weak_ordering::LESS;
294 template <
bool has_nested_columns,
330 PhysicalElementComparator comparator = {}) noexcept
333 _l_dremel(l_dremel_device_views),
334 _r_dremel(r_dremel_device_views),
335 _check_nulls{check_nulls},
337 _column_order{column_order},
338 _null_precedence{null_precedence},
339 _comparator{comparator}
361 template <
bool nested_disable = not has_nested_columns, CUDF_ENABLE_IF(nested_disable)>
368 PhysicalElementComparator comparator = {}) noexcept
373 _check_nulls{check_nulls},
375 _column_order{column_order},
376 _null_precedence{null_precedence},
377 _comparator{comparator}
405 null_order null_precedence = null_order::BEFORE,
407 PhysicalElementComparator comparator = {},
412 _check_nulls{check_nulls},
413 _null_precedence{null_precedence},
415 _l_dremel_device_view{l_dremel_device_view},
416 _r_dremel_device_view{r_dremel_device_view},
417 _comparator{comparator}
429 template <
typename Element,
430 CUDF_ENABLE_IF(cudf::is_relationally_comparable<Element, Element>())>
435 bool const lhs_is_null{_lhs.is_null(lhs_element_index)};
436 bool const rhs_is_null{_rhs.is_null(rhs_element_index)};
438 if (lhs_is_null or rhs_is_null) {
439 return cuda::std::pair(
null_compare(lhs_is_null, rhs_is_null, _null_precedence), _depth);
443 return cuda::std::pair(_comparator(_lhs.element<Element>(lhs_element_index),
444 _rhs.element<Element>(rhs_element_index)),
445 cuda::std::numeric_limits<int>::max());
455 template <
typename Element,
456 CUDF_ENABLE_IF(not cudf::is_relationally_comparable<Element, Element>() and
457 (not has_nested_columns or not cudf::is_nested<Element>()))>
461 CUDF_UNREACHABLE(
"Attempted to compare elements of uncomparable types.");
472 template <
typename Element,
473 CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::struct_view>)>
480 while (lcol.
type().
id() == type_id::STRUCT) {
481 bool const lhs_is_null{lcol.
is_null(lhs_element_index)};
482 bool const rhs_is_null{rcol.
is_null(rhs_element_index)};
484 if (lhs_is_null or rhs_is_null) {
486 return cuda::std::pair(state, depth);
490 return cuda::std::pair(weak_ordering::EQUIVALENT, cuda::std::numeric_limits<int>::max());
499 return cudf::type_dispatcher<dispatch_void_if_nested>(
514 template <
typename Element,
515 CUDF_ENABLE_IF(has_nested_columns and std::is_same_v<Element, cudf::list_view>)>
520 auto const is_l_row_null = _lhs.is_null(lhs_element_index);
521 auto const is_r_row_null = _rhs.is_null(rhs_element_index);
522 if (is_l_row_null || is_r_row_null) {
523 return cuda::std::pair(
null_compare(is_l_row_null, is_r_row_null, _null_precedence),
528 auto const l_max_def_level = _l_dremel_device_view->max_def_level;
529 auto const r_max_def_level = _r_dremel_device_view->max_def_level;
530 auto const l_def_levels = _l_dremel_device_view->def_levels;
531 auto const r_def_levels = _r_dremel_device_view->def_levels;
532 auto const l_rep_levels = _l_dremel_device_view->rep_levels;
533 auto const r_rep_levels = _r_dremel_device_view->rep_levels;
540 while (lcol.
type().
id() == type_id::LIST) {
548 auto const l_offsets = _l_dremel_device_view->offsets;
549 auto const r_offsets = _r_dremel_device_view->offsets;
550 auto l_start = l_offsets[lhs_element_index];
551 auto l_end = l_offsets[lhs_element_index + 1];
552 auto r_start = r_offsets[rhs_element_index];
553 auto r_end = r_offsets[rhs_element_index + 1];
563 for (
int l_dremel_index = l_start, r_dremel_index = r_start, element_index = 0;
564 l_dremel_index < l_end and r_dremel_index < r_end;
565 ++l_dremel_index, ++r_dremel_index) {
566 auto const l_rep_level = l_rep_levels[l_dremel_index];
567 auto const r_rep_level = r_rep_levels[r_dremel_index];
570 if (l_rep_level != r_rep_level) {
572 return l_rep_level < r_rep_level ? cuda::std::pair(weak_ordering::LESS, _depth)
573 : cuda::std::pair(weak_ordering::GREATER, _depth);
577 auto const l_def_level = l_def_levels[l_dremel_index];
578 auto const r_def_level = r_def_levels[r_dremel_index];
581 if (l_def_level < l_max_def_level || r_def_level < r_max_def_level) {
588 if ((lcol.
nullable() and l_def_levels[l_dremel_index] == l_max_def_level - 1) or
589 (rcol.
nullable() and r_def_levels[r_dremel_index] == r_max_def_level - 1)) {
592 if (l_def_level == r_def_level) {
continue; }
595 return l_def_level < r_def_level ? cuda::std::pair(weak_ordering::LESS, _depth)
596 : cuda::std::pair(weak_ordering::GREATER, _depth);
601 int last_null_depth = _depth;
602 cuda::std::tie(state, last_null_depth) = cudf::type_dispatcher<dispatch_void_if_nested>(
603 lcol.
type(), comparator, element_index, element_index);
604 if (state != weak_ordering::EQUIVALENT) {
return cuda::std::pair(state, _depth); }
621 Nullate
const _check_nulls;
626 PhysicalElementComparator
const _comparator;
640 size_type const rhs_index)
const noexcept
642 int last_null_depth = cuda::std::numeric_limits<int>::max();
644 for (
size_type i = 0; i < _lhs.num_columns(); ++i) {
645 if (_lhs.column(i).type().id() == type_id::LIST) { ++list_column_index; }
647 int const depth = _depth.has_value() ? (*_depth)[i] : 0;
648 if (depth > last_null_depth) {
continue; }
650 bool const ascending =
651 _column_order.has_value() ? (*_column_order)[i] == order::ASCENDING :
true;
654 _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE;
659 auto const [l_dremel_i, r_dremel_i] =
660 _lhs.column(i).type().id() == type_id::LIST
675 cuda::std::tie(state, last_null_depth) =
678 if (state == weak_ordering::EQUIVALENT) {
continue; }
682 : (state == weak_ordering::GREATER ? weak_ordering::LESS : weak_ordering::GREATER);
684 return weak_ordering::EQUIVALENT;
692 Nullate
const _check_nulls;
693 cuda::std::optional<device_span<int const>>
const _depth;
694 cuda::std::optional<device_span<order const>>
const _column_order;
695 cuda::std::optional<device_span<null_order const>>
const _null_precedence;
696 PhysicalElementComparator
const _comparator;
713 struct weak_ordering_comparator_impl {
714 static_assert(not((weak_ordering::EQUIVALENT == values) && ...),
715 "weak_ordering_comparator should not be used for pure equality comparisons. The "
716 "`row_equality_comparator` should be used instead");
718 template <
typename LhsType,
typename RhsType>
719 __device__ constexpr
bool operator()(LhsType
const lhs_index,
720 RhsType
const rhs_index)
const noexcept
722 weak_ordering const result = comparator(lhs_index, rhs_index);
723 return ((result == values) || ...);
725 Comparator
const comparator;
737 template <
typename Comparator>
738 struct less_comparator : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS> {
745 : weak_ordering_comparator_impl<Comparator,
weak_ordering::LESS>{comparator}
760 template <
typename Comparator>
762 : weak_ordering_comparator_impl<Comparator, weak_ordering::LESS, weak_ordering::EQUIVALENT> {
834 static std::pair<std::shared_ptr<preprocessed_table>, std::shared_ptr<preprocessed_table>>
create(
864 static std::shared_ptr<preprocessed_table> create(
866 std::vector<int>&& verticalized_col_depths,
867 std::vector<std::unique_ptr<column>>&& transformed_columns,
870 bool has_ranked_children,
903 std::vector<detail::dremel_data>&& dremel_data,
905 std::vector<std::unique_ptr<column>>&& transformed_columns,
906 bool has_ranked_children);
912 std::vector<std::unique_ptr<column>>&& transformed_columns,
913 bool has_ranked_children);
928 [[nodiscard]] cuda::std::optional<device_span<order const>> column_order()
const
930 return _column_order.size() ? cuda::std::optional<device_span<order const>>(_column_order)
931 : cuda::std::nullopt;
941 [[nodiscard]] cuda::std::optional<device_span<null_order const>> null_precedence()
const
943 return _null_precedence.size()
944 ? cuda::std::optional<device_span<null_order const>>(_null_precedence)
945 : cuda::std::nullopt;
956 [[nodiscard]] cuda::std::optional<device_span<int const>> depths()
const
958 return _depths.size() ? cuda::std::optional<device_span<int const>>(_depths)
959 : cuda::std::nullopt;
964 if (_dremel_device_views.has_value()) {
971 template <
typename PhysicalElementComparator>
972 void check_physical_element_comparator()
974 if constexpr (!std::is_same_v<PhysicalElementComparator, sorting_physical_element_comparator>) {
976 "The input table has nested type children and they were transformed using a "
977 "different type of physical element comparator.");
982 table_device_view_owner
const _t;
988 cuda::std::optional<std::vector<detail::dremel_data>> _dremel_data;
989 cuda::std::optional<rmm::device_uvector<detail::dremel_device_view>> _dremel_device_views;
993 std::vector<std::unique_ptr<column>> _transformed_columns;
997 bool const _has_ranked_children;
1036 : d_t{preprocessed_table::create(t, column_order, null_precedence, stream)}
1079 template <
bool has_nested_columns,
1082 auto less(Nullate
nullate = {}, PhysicalElementComparator comparator = {})
const
1084 d_t->check_physical_element_comparator<PhysicalElementComparator>();
1086 return less_comparator{
1087 device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1091 d_t->dremel_device_views(),
1092 d_t->dremel_device_views(),
1094 d_t->column_order(),
1095 d_t->null_precedence(),
1100 template <
bool has_nested_columns,
1102 typename PhysicalElementComparator = sorting_physical_element_comparator>
1105 d_t->check_physical_element_comparator<PhysicalElementComparator>();
1107 return less_equivalent_comparator{
1108 device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1112 d_t->dremel_device_views(),
1113 d_t->dremel_device_views(),
1115 d_t->column_order(),
1116 d_t->null_precedence(),
1121 std::shared_ptr<preprocessed_table> d_t;
1125 template <
typename Comparator>
1126 struct strong_index_comparator_adapter {
1127 strong_index_comparator_adapter(Comparator
const& comparator) : comparator{comparator} {}
1129 __device__ constexpr
weak_ordering operator()(lhs_index_type
const lhs_index,
1130 rhs_index_type
const rhs_index)
const noexcept
1136 __device__ constexpr
weak_ordering operator()(rhs_index_type
const rhs_index,
1137 lhs_index_type
const lhs_index)
const noexcept
1139 auto const left_right_ordering =
1143 if (left_right_ordering == weak_ordering::LESS) {
1144 return weak_ordering::GREATER;
1145 }
else if (left_right_ordering == weak_ordering::GREATER) {
1146 return weak_ordering::LESS;
1148 return weak_ordering::EQUIVALENT;
1151 Comparator
const comparator;
1213 std::shared_ptr<preprocessed_table> right)
1214 : d_left_table{std::move(left)}, d_right_table{std::move(right)}
1254 template <
bool has_nested_columns,
1257 auto less(Nullate
nullate = {}, PhysicalElementComparator comparator = {})
const
1259 d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
1260 d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
1262 return less_comparator{strong_index_comparator_adapter{
1263 device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1267 d_left_table->dremel_device_views(),
1268 d_right_table->dremel_device_views(),
1269 d_left_table->depths(),
1270 d_left_table->column_order(),
1271 d_left_table->null_precedence(),
1278 typename PhysicalElementComparator = sorting_physical_element_comparator>
1281 d_left_table->check_physical_element_comparator<PhysicalElementComparator>();
1282 d_right_table->check_physical_element_comparator<PhysicalElementComparator>();
1284 return less_equivalent_comparator{strong_index_comparator_adapter{
1285 device_row_comparator<has_nested_columns, Nullate, PhysicalElementComparator>{
1289 d_left_table->dremel_device_views(),
1290 d_right_table->dremel_device_views(),
1291 d_left_table->depths(),
1292 d_left_table->column_order(),
1293 d_left_table->null_precedence(),
1298 std::shared_ptr<preprocessed_table> d_left_table;
1299 std::shared_ptr<preprocessed_table> d_right_table;
1308 namespace equality {
1328 template <
typename Element>
1329 __device__ constexpr
bool operator()(Element
const lhs, Element
const rhs)
const noexcept
1350 template <
typename Element, CUDF_ENABLE_IF(not std::is_
floating_po
int_v<Element>)>
1351 __device__ constexpr
bool operator()(Element
const lhs, Element
const rhs)
const noexcept
1365 template <
typename Element, CUDF_ENABLE_IF(std::is_
floating_po
int_v<Element>)>
1366 __device__ constexpr
bool operator()(Element
const lhs, Element
const rhs)
const noexcept
1368 return isnan(lhs) and isnan(rhs) ? true : lhs == rhs;
1396 template <
bool has_nested_columns,
1398 typename PhysicalEqualityComparator = nan_equal_physical_equality_comparator>
1413 size_type const rhs_index)
const noexcept
1418 element_comparator{check_nulls, l, r, nulls_are_equal, comparator},
1423 return thrust::equal(thrust::seq, lhs.
begin(), lhs.
end(), rhs.
begin(), equal_elements);
1441 PhysicalEqualityComparator comparator = {}) noexcept
1444 check_nulls{check_nulls},
1445 nulls_are_equal{nulls_are_equal},
1446 comparator{comparator}
1453 class element_comparator {
1467 __device__ element_comparator(Nullate check_nulls,
1468 column_device_view lhs,
1469 column_device_view rhs,
1471 PhysicalEqualityComparator comparator = {}) noexcept
1474 check_nulls{check_nulls},
1475 nulls_are_equal{nulls_are_equal},
1476 comparator{comparator}
1488 template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
1489 __device__
bool operator()(
size_type const lhs_element_index,
1490 size_type const rhs_element_index)
const noexcept
1493 bool const lhs_is_null{lhs.is_null(lhs_element_index)};
1494 bool const rhs_is_null{rhs.is_null(rhs_element_index)};
1495 if (lhs_is_null and rhs_is_null) {
1496 return nulls_are_equal == null_equality::EQUAL;
1497 }
else if (lhs_is_null != rhs_is_null) {
1502 return comparator(lhs.element<Element>(lhs_element_index),
1503 rhs.element<Element>(rhs_element_index));
1506 template <
typename Element,
1507 CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>() and
1508 (not has_nested_columns or not cudf::is_nested<Element>())),
1510 __device__
bool operator()(Args...)
1512 CUDF_UNREACHABLE(
"Attempted to compare elements of uncomparable types.");
1515 template <typename Element, CUDF_ENABLE_IF(has_nested_columns and cudf::is_nested<Element>())>
1516 __device__
bool operator()(
size_type const lhs_element_index,
1517 size_type const rhs_element_index)
const noexcept
1519 column_device_view lcol = lhs.slice(lhs_element_index, 1);
1520 column_device_view rcol = rhs.slice(rhs_element_index, 1);
1521 while (lcol.type().id() == type_id::STRUCT || lcol.type().id() == type_id::LIST) {
1523 auto lvalid = detail::make_validity_iterator<true>(lcol);
1524 auto rvalid = detail::make_validity_iterator<true>(rcol);
1525 if (nulls_are_equal == null_equality::UNEQUAL) {
1527 thrust::seq, lvalid, lvalid + lcol.size(), cuda::std::logical_not<bool>()) or
1529 thrust::seq, rvalid, rvalid + rcol.size(), cuda::std::logical_not<bool>())) {
1533 if (not thrust::equal(thrust::seq, lvalid, lvalid + lcol.size(), rvalid)) {
1538 if (lcol.type().id() == type_id::STRUCT) {
1539 if (lcol.num_child_columns() == 0) {
return true; }
1541 lcol = detail::structs_column_device_view(lcol).get_sliced_child(0);
1542 rcol = detail::structs_column_device_view(rcol).get_sliced_child(0);
1543 }
else if (lcol.type().id() == type_id::LIST) {
1544 auto l_list_col = detail::lists_column_device_view(lcol);
1545 auto r_list_col = detail::lists_column_device_view(rcol);
1549 if (not thrust::equal(thrust::seq, lsizes, lsizes + lcol.size(), rsizes)) {
1553 lcol = l_list_col.get_sliced_child();
1554 rcol = r_list_col.get_sliced_child();
1555 if (lcol.size() != rcol.size()) {
return false; }
1559 auto comp = column_comparator{
1560 element_comparator{check_nulls, lcol, rcol, nulls_are_equal, comparator}, lcol.size()};
1561 return type_dispatcher<dispatch_void_if_nested>(lcol.type(), comp);
1572 struct column_comparator {
1573 element_comparator
const comp;
1581 template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
1582 __device__
bool operator()()
const noexcept
1584 return thrust::all_of(thrust::seq,
1585 thrust::make_counting_iterator(0),
1586 thrust::make_counting_iterator(0) + size,
1587 [
this](
auto i) {
return comp.template operator()<Element>(i, i); });
1590 template <
typename Element,
1591 CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>()),
1593 __device__
bool operator()(Args...) const noexcept
1595 CUDF_UNREACHABLE(
"Attempted to compare elements of uncomparable types.");
1599 column_device_view
const lhs;
1600 column_device_view
const rhs;
1601 Nullate
const check_nulls;
1603 PhysicalEqualityComparator
const comparator;
1606 table_device_view
const lhs;
1607 table_device_view
const rhs;
1608 Nullate
const check_nulls;
1610 PhysicalEqualityComparator
const comparator;
1640 friend class ::cudf::row::primitive::row_equality_comparator;
1642 template <
template <
typename>
class Hash>
1643 friend class ::cudf::row::primitive::row_hasher;
1645 using table_device_view_owner =
1649 std::vector<rmm::device_buffer>&& null_buffers,
1650 std::vector<std::unique_ptr<column>>&& tmp_columns)
1651 : _t(std::move(
table)),
1652 _null_buffers(std::move(null_buffers)),
1653 _tmp_columns(std::move(tmp_columns))
1664 table_device_view_owner _t;
1665 std::vector<rmm::device_buffer> _null_buffers;
1666 std::vector<std::unique_ptr<column>> _tmp_columns;
1726 template <
bool has_nested_columns,
1731 PhysicalEqualityComparator comparator = {})
const noexcept
1733 return device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>{
1734 nullate, *d_t, *d_t, nulls_are_equal, comparator};
1738 std::shared_ptr<preprocessed_table> d_t;
1742 template <
typename Comparator>
1743 struct strong_index_comparator_adapter {
1744 strong_index_comparator_adapter(Comparator
const& comparator) : comparator{comparator} {}
1746 __device__ constexpr
bool operator()(lhs_index_type
const lhs_index,
1747 rhs_index_type
const rhs_index)
const noexcept
1753 __device__ constexpr
bool operator()(rhs_index_type
const rhs_index,
1754 lhs_index_type
const lhs_index)
const noexcept
1756 return this->operator()(lhs_index, rhs_index);
1759 Comparator
const comparator;
1808 std::shared_ptr<preprocessed_table> right)
1809 : d_left_table{std::move(left)}, d_right_table{std::move(right)}
1843 template <
bool has_nested_columns,
1848 PhysicalEqualityComparator comparator = {})
const noexcept
1850 return strong_index_comparator_adapter{
1851 device_row_comparator<has_nested_columns, Nullate, PhysicalEqualityComparator>(
1852 nullate, *d_left_table, *d_right_table, nulls_are_equal, comparator)};
1856 std::shared_ptr<preprocessed_table> d_left_table;
1857 std::shared_ptr<preprocessed_table> d_right_table;
1873 template <
template <
typename>
class hash_function,
typename Nullate>
1885 uint32_t seed = DEFAULT_HASH_SEED,
1886 hash_value_type null_hash = cuda::std::numeric_limits<hash_value_type>::max()) noexcept
1887 : _check_nulls(nulls), _seed(seed), _null_hash(null_hash)
1899 template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
1903 if (_check_nulls && col.
is_null(row_index)) {
return _null_hash; }
1904 return hash_function<T>{_seed}(col.
element<T>(row_index));
1915 template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
1919 CUDF_UNREACHABLE(
"Unsupported type in hash.");
1936 template <
template <
typename>
class hash_function,
typename Nullate>
1950 thrust::make_transform_iterator(_table.begin(), [row_index,
this](
auto const&
column) {
1951 return cudf::type_dispatcher<dispatch_storage_type>(
1953 element_hasher_adapter<hash_function>{_check_nulls, _seed},
1959 return detail::accumulate(it, it + _table.num_columns(), _seed, [](
auto hash,
auto h) {
1960 return cudf::hashing::detail::hash_combine(hash, h);
1972 template <
template <
typename>
class hash_fn>
1973 class element_hasher_adapter {
1974 static constexpr
hash_value_type NULL_HASH = cuda::std::numeric_limits<hash_value_type>::max();
1978 __device__ element_hasher_adapter(Nullate check_nulls, uint32_t seed) noexcept
1979 : _element_hasher(check_nulls, seed), _check_nulls(check_nulls)
1983 template <typename T, CUDF_ENABLE_IF(not cudf::is_nested<T>())>
1987 return _element_hasher.template operator()<T>(col, row_index);
1990 template <typename T, CUDF_ENABLE_IF(cudf::is_nested<T>())>
1995 column_device_view curr_col = col.
slice(row_index, 1);
1996 while (curr_col.type().id() == type_id::STRUCT || curr_col.type().id() == type_id::LIST) {
1998 auto validity_it = detail::make_validity_iterator<true>(curr_col);
1999 hash = detail::accumulate(
2000 validity_it, validity_it + curr_col.size(), hash, [](
auto hash,
auto is_valid) {
2001 return cudf::hashing::detail::hash_combine(hash,
2002 is_valid ? NON_NULL_HASH : NULL_HASH);
2005 if (curr_col.type().id() == type_id::STRUCT) {
2006 if (curr_col.num_child_columns() == 0) {
return hash; }
2008 curr_col = detail::structs_column_device_view(curr_col).get_sliced_child(0);
2009 }
else if (curr_col.type().id() == type_id::LIST) {
2010 auto list_col = detail::lists_column_device_view(curr_col);
2012 hash = detail::accumulate(
2013 list_sizes, list_sizes + list_col.size(), hash, [](
auto hash,
auto size) {
2014 return cudf::hashing::detail::hash_combine(hash, hash_fn<size_type>{}(size));
2016 curr_col = list_col.get_sliced_child();
2019 for (
int i = 0; i < curr_col.size(); ++i) {
2020 hash = cudf::hashing::detail::hash_combine(
2022 type_dispatcher<dispatch_void_if_nested>(curr_col.type(), _element_hasher, curr_col, i));
2027 element_hasher<hash_fn, Nullate>
const _element_hasher;
2028 Nullate
const _check_nulls;
2032 table_device_view t,
2033 uint32_t seed = DEFAULT_HASH_SEED) noexcept
2034 : _check_nulls{check_nulls}, _table{t}, _seed(seed)
2038 Nullate
const _check_nulls;
2039 table_device_view
const _table;
2040 uint32_t
const _seed;
2078 row_hasher(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
2093 template <
typename>
class hash_function = cudf::hashing::detail::default_hash,
2094 template <
template <
typename>
class,
typename>
class DeviceRowHasher =
device_row_hasher,
2097 uint32_t seed = DEFAULT_HASH_SEED)
const
2099 return DeviceRowHasher<hash_function, Nullate>(
nullate, *d_t, seed);
2103 std::shared_ptr<preprocessed_table> d_t;
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
T element(size_type element_index) const noexcept
Returns a copy of the element at the specified index.
CUDF_HOST_DEVICE column_device_view slice(size_type offset, size_type size) const noexcept
Get a new column_device_view which is a slice of this column.
CUDF_HOST_DEVICE size_type num_child_columns() const noexcept
Returns the number of child columns.
const_iterator< T > begin() const
Return an iterator to the first element of the column.
const_iterator< T > end() const
Returns an iterator to the element following the last element of the column.
A container of nullable device data as a column of elements.
constexpr CUDF_HOST_DEVICE type_id id() const noexcept
Returns the type identifier.
CUDF_HOST_DEVICE data_type type() const noexcept
Returns the element type.
bool is_null(size_type element_index) const noexcept
Returns whether the specified element is null.
CUDF_HOST_DEVICE bool nullable() const noexcept
Indicates whether the column can contain null elements, i.e., if it has an allocated bitmask.
Given a column_device_view, an instance of this class provides a wrapper on this compound column for ...
column_device_view get_sliced_child() const
Fetches the child column of the underlying list column with offset and size applied.
Given a column_device_view, an instance of this class provides a wrapper on this compound column for ...
column_device_view get_sliced_child(size_type idx) const
Fetches the child column of the underlying struct column.
Computes the equality comparison between 2 rows.
constexpr bool operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table is equal to the row at rhs_index in the rhs tabl...
Comparator for performing equality comparisons between two rows of the same table.
self_comparator(std::shared_ptr< preprocessed_table > t)
Construct an owning object for performing equality comparisons between two rows of the same table.
self_comparator(table_view const &t, rmm::cuda_stream_view stream)
Construct an owning object for performing equality comparisons between two rows of the same table.
auto equal_to(Nullate nullate={}, null_equality nulls_are_equal=null_equality::EQUAL, PhysicalEqualityComparator comparator={}) const noexcept
Get the comparison operator to use on the device.
An owning object that can be used to equality compare rows of two different tables.
two_table_comparator(std::shared_ptr< preprocessed_table > left, std::shared_ptr< preprocessed_table > right)
Construct an owning object for performing equality comparisons between two rows from two tables.
auto equal_to(Nullate nullate={}, null_equality nulls_are_equal=null_equality::EQUAL, PhysicalEqualityComparator comparator={}) const noexcept
Return the binary operator for comparing rows in the table.
two_table_comparator(table_view const &left, table_view const &right, rmm::cuda_stream_view stream)
Construct an owning object for performing equality comparisons between two rows from two tables.
Computes the hash value of a row in the given table.
auto operator()(size_type row_index) const noexcept
Return the hash value of a row in the given table.
Computes the hash value of an element in the given column.
uint32_t _seed
The seed to use for hashing.
element_hasher(Nullate nulls, uint32_t seed=DEFAULT_HASH_SEED, hash_value_type null_hash=cuda::std::numeric_limits< hash_value_type >::max()) noexcept
Constructs an element_hasher object.
Nullate _check_nulls
Whether to check for nulls.
hash_value_type _null_hash
Hash value to use for null elements.
Computes the hash value of a row in the given table.
DeviceRowHasher< hash_function, Nullate > device_hasher(Nullate nullate={}, uint32_t seed=DEFAULT_HASH_SEED) const
Get the hash operator to use on the device.
row_hasher(table_view const &t, rmm::cuda_stream_view stream)
Construct an owning object for hashing the rows of a table.
row_hasher(std::shared_ptr< preprocessed_table > t)
Construct an owning object for hashing the rows of a table from an existing preprocessed_table.
Performs a relational comparison between two elements in two columns.
cuda::std::pair< weak_ordering, int > operator()(size_type lhs_element_index, size_type rhs_element_index)
Compares two list-type columns.
cuda::std::pair< weak_ordering, int > operator()(size_type const lhs_element_index, size_type const rhs_element_index) const noexcept
Performs a relational comparison between the specified elements.
cuda::std::pair< weak_ordering, int > operator()(size_type const, size_type const) const noexcept
Throws run-time error when columns types cannot be compared or if this class is instantiated with has...
element_comparator(Nullate check_nulls, column_device_view lhs, column_device_view rhs, null_order null_precedence=null_order::BEFORE, int depth=0, PhysicalElementComparator comparator={}, optional_dremel_view l_dremel_device_view={}, optional_dremel_view r_dremel_device_view={})
Construct type-dispatched function object for performing a relational comparison between two elements...
Computes the lexicographic comparison between 2 rows.
constexpr weak_ordering operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table compares lexicographically less,...
device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, cuda::std::optional< device_span< order const >> column_order=cuda::std::nullopt, cuda::std::optional< device_span< null_order const >> null_precedence=cuda::std::nullopt, PhysicalElementComparator comparator={}) noexcept
Construct a function object for performing a lexicographic comparison between the rows of two tables....
device_row_comparator(Nullate check_nulls, table_device_view lhs, table_device_view rhs, device_span< detail::dremel_device_view const > l_dremel_device_views, device_span< detail::dremel_device_view const > r_dremel_device_views, cuda::std::optional< device_span< int const >> depth=cuda::std::nullopt, cuda::std::optional< device_span< order const >> column_order=cuda::std::nullopt, cuda::std::optional< device_span< null_order const >> null_precedence=cuda::std::nullopt, PhysicalElementComparator comparator={}) noexcept
Construct a function object for performing a lexicographic comparison between the rows of two tables.
An owning object that can be used to lexicographically compare two rows of the same table.
auto less_equivalent(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
self_comparator(table_view const &t, host_span< order const > column_order={}, host_span< null_order const > null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct an owning object for performing a lexicographic comparison between two rows of the same tab...
self_comparator(std::shared_ptr< preprocessed_table > t)
Construct an owning object for performing a lexicographic comparison between two rows of the same pre...
auto less(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
An owning object that can be used to lexicographically compare rows of two different tables.
two_table_comparator(std::shared_ptr< preprocessed_table > left, std::shared_ptr< preprocessed_table > right)
Construct an owning object for performing a lexicographic comparison between two rows of the same pre...
auto less(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
two_table_comparator(table_view const &left, table_view const &right, host_span< order const > column_order={}, host_span< null_order const > null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct an owning object for performing a lexicographic comparison between rows of two different ta...
auto less_equivalent(Nullate nullate={}, PhysicalElementComparator comparator={}) const
Return the binary operator for comparing rows in the table.
Table device view that is usable in device memory.
A set of cudf::column_view's of the same size.
A set of cudf::column's of the same size.
Column device view class definitions.
lhs_index_type
Strongly typed index for left-hand side table rows.
cuda::std::optional< detail::dremel_device_view const > optional_dremel_view
Optional dremel device view for handling nested column structures.
rhs_index_type
Strongly typed index for right-hand side table rows.
uint32_t hash_value_type
Type of hash value.
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
CUDF_HOST_DEVICE constexpr decltype(auto) __forceinline__ type_dispatcher(cudf::data_type dtype, Functor f, Ts &&... args)
Invokes an operator() template with the type instantiation based on the specified cudf::data_type's i...
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
null_order
Indicates how null values compare against all other values.
null_equality
Enum to consider two nulls as equal or unequal.
int32_t size_type
Row index type for columns and tables.
#define CUDF_ENABLE_IF(...)
Convenience macro for SFINAE as an unnamed template parameter.
weak_ordering compare_elements(Element lhs, Element rhs)
Alias for backward compatibility with legacy row operators.
auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
Alias for backward compatibility with legacy row operators.
cudf::detail::weak_ordering weak_ordering
Alias for backward compatibility with legacy row operators.
CUDF_HOST_DEVICE auto make_list_size_iterator(detail::lists_column_device_view const &c)
Makes an iterator that returns size of the list by row index.
DEPRECATED: This header is deprecated as of 25.10 and will be removed in 25.12.
Column APIs for sort and rank.
Device version of C++20 std::span with reduced feature set.
A map from cudf::type_id to cudf type that excludes LIST and STRUCT types.
std::conditional_t< t==type_id::STRUCT or t==type_id::LIST, void, id_to_type< t > > type
The type to dispatch to if the type is nested.
Equality comparator functor that compares physical values rather than logical elements like lists,...
constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
Operator for equality comparison of non-floating point values.
Equality comparator functor that compares physical values rather than logical elements like lists,...
constexpr bool operator()(Element const lhs, Element const rhs) const noexcept
Operator for equality comparisons.
Preprocessed table for use with row equality comparison or row hashing.
static std::shared_ptr< preprocessed_table > create(table_view const &table, rmm::cuda_stream_view stream)
Factory to construct preprocessed_table for use with row equality comparison or row hashing.
Wraps and interprets the result of device_row_comparator, true if the result is weak_ordering::LESS m...
less_comparator(Comparator const &comparator)
Constructs a less_comparator.
Wraps and interprets the result of device_row_comparator, true if the result is weak_ordering::LESS o...
less_equivalent_comparator(Comparator const &comparator)
Constructs a less_equivalent_comparator.
Computes a weak ordering of two values with special sorting behavior.
constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
Operator for relational comparisons.
Preprocessed table for use with lexicographical comparison.
std::invoke_result_t< decltype(table_device_view::create), table_view, rmm::cuda_stream_view > table_device_view_owner
Type of table device view owner for the preprocessed table.
static std::pair< std::shared_ptr< preprocessed_table >, std::shared_ptr< preprocessed_table > > create(table_view const &lhs, table_view const &rhs, host_span< order const > column_order, host_span< null_order const > null_precedence, rmm::cuda_stream_view stream)
Preprocess tables for use with lexicographical comparison.
static std::shared_ptr< preprocessed_table > create(table_view const &table, host_span< order const > column_order, host_span< null_order const > null_precedence, rmm::cuda_stream_view stream)
Preprocess table for use with lexicographical comparison.
Relational comparator functor that compares physical values rather than logical elements like lists,...
constexpr weak_ordering operator()(Element const lhs, Element const rhs) const noexcept
Operator for relational comparison of non-floating point values.
A counting iterator that uses strongly typed indices bound to tables.
thrust::iterator_adaptor< strong_index_iterator< Index >, Index > super_t
The base class.
constexpr strong_index_iterator(Underlying n)
Constructs a strong index iterator.
C++20 std::span with reduced feature set.
Indicates the presence of nulls at compile-time or runtime.
Table device view class definitions.
bool has_nested_columns(table_view const &table)
Determine if any nested columns exist in a given table.
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.