key_remapping.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include <cudf/column/column.hpp>
10 #include <cudf/types.hpp>
12 #include <cudf/utilities/export.hpp>
14 
15 #include <rmm/cuda_stream_view.hpp>
16 
17 #include <memory>
18 
19 namespace CUDF_EXPORT cudf {
20 
30 enum class compute_metrics : bool { NO = false, YES = true };
31 
32 namespace detail {
36 class key_remapping_impl;
37 } // namespace detail
38 
47 
56 
62 [[deprecated("Use KEY_REMAP_RIGHT_NULL instead.")]] constexpr size_type KEY_REMAP_BUILD_NULL =
64 
80  public:
81  key_remapping() = delete;
82  ~key_remapping();
83  key_remapping(key_remapping const&) = delete;
84  key_remapping(key_remapping&&) = delete;
85  key_remapping& operator=(key_remapping const&) = delete;
86  key_remapping& operator=(key_remapping&&) = delete;
87 
105  null_equality compare_nulls = null_equality::EQUAL,
106  cudf::compute_metrics metrics = cudf::compute_metrics::YES,
108 
124  [[nodiscard]] std::unique_ptr<cudf::column> remap_right_keys(
127 
138  [[deprecated("Use remap_right_keys instead.")]] [[nodiscard]] std::unique_ptr<cudf::column>
142  {
143  return remap_right_keys(stream, mr);
144  }
145 
163  [[nodiscard]] std::unique_ptr<cudf::column> remap_left_keys(
164  cudf::table_view const& keys,
167 
179  [[deprecated("Use remap_left_keys instead.")]] [[nodiscard]] std::unique_ptr<cudf::column>
181  cudf::table_view const& keys,
184  {
185  return remap_left_keys(keys, stream, mr);
186  }
187 
193  [[nodiscard]] bool has_metrics() const;
194 
202  [[nodiscard]] size_type get_distinct_count() const;
203 
211  [[nodiscard]] size_type get_max_duplicate_count() const;
212 
213  private:
214  using impl_type = cudf::detail::key_remapping_impl;
215 
216  std::unique_ptr<impl_type> _impl;
217 };
218  // end of group
220 
221 } // namespace CUDF_EXPORT cudf
Remaps keys to unique integer IDs.
std::unique_ptr< cudf::column > remap_probe_keys(cudf::table_view const &keys, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Deprecated alias for remap_left_keys().
bool has_metrics() const
Check if metrics (distinct_count, max_duplicate_count) were computed.
key_remapping(cudf::table_view const &right, null_equality compare_nulls=null_equality::EQUAL, cudf::compute_metrics metrics=cudf::compute_metrics::YES, rmm::cuda_stream_view stream=cudf::get_default_stream())
Constructs a key remapping structure from the given right keys.
size_type get_max_duplicate_count() const
Get the maximum number of times any single key appears.
std::unique_ptr< cudf::column > remap_build_keys(rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Deprecated alias for remap_right_keys().
std::unique_ptr< cudf::column > remap_left_keys(cudf::table_view const &keys, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Remap left keys to integer IDs.
std::unique_ptr< cudf::column > remap_right_keys(rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Remap right keys to integer IDs.
size_type get_distinct_count() const
Get the number of distinct keys in the right table.
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:189
Class definition for cudf::column.
constexpr size_type KEY_REMAP_NOT_FOUND
Sentinel value for left-side keys not found in the right table.
constexpr size_type KEY_REMAP_RIGHT_NULL
Sentinel value for right-side rows with null keys (when nulls are not equal)
constexpr size_type KEY_REMAP_BUILD_NULL
Deprecated alias for KEY_REMAP_RIGHT_NULL.
compute_metrics
Enum to control whether key remapping metrics should be computed.
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
cuda::mr::resource_ref< cuda::mr::device_accessible > device_async_resource_ref
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:141
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:85
cuDF interfaces
Definition: host_udf.hpp:26
Class definitions for (mutable)_table_view
Type declarations for libcudf.