parquet_io_utils.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include <cudf/io/datasource.hpp>
10 
11 #include <rmm/cuda_stream_view.hpp>
12 #include <rmm/device_buffer.hpp>
13 #include <rmm/resource_ref.hpp>
14 
15 #include <cstddef>
16 #include <functional>
17 #include <future>
18 #include <tuple>
19 #include <vector>
20 
26 namespace CUDF_EXPORT cudf {
27 namespace io::parquet {
28 
37 
55 [[nodiscard]] std::size_t metadata_size_hint();
56 
65 [[nodiscard]] std::unique_ptr<cudf::io::datasource::buffer> fetch_footer_to_host(
67 
79 [[nodiscard]] std::vector<std::unique_ptr<cudf::io::datasource::buffer>> fetch_footers_to_host(
80  cudf::host_span<std::reference_wrapper<cudf::io::datasource> const> datasources);
81 
91 [[nodiscard]] std::unique_ptr<cudf::io::datasource::buffer> fetch_page_index_to_host(
92  cudf::io::datasource& datasource, byte_range_info const page_index_bytes);
93 
107 [[nodiscard]] std::vector<std::unique_ptr<cudf::io::datasource::buffer>> fetch_page_indexes_to_host(
108  cudf::host_span<std::reference_wrapper<cudf::io::datasource> const> datasources,
109  cudf::host_span<byte_range_info const> page_index_bytes_per_source);
110 
124 std::tuple<std::vector<rmm::device_buffer>,
125  std::vector<cudf::device_span<uint8_t const>>,
126  std::future<void>>
129  rmm::cuda_stream_view stream,
131 
145 std::tuple<std::vector<rmm::device_buffer>,
146  std::vector<std::vector<cudf::device_span<uint8_t const>>>,
147  std::future<void>>
149  cudf::host_span<std::reference_wrapper<cudf::io::datasource> const> datasources,
150  cudf::host_span<std::vector<byte_range_info> const> byte_ranges_per_source,
151  rmm::cuda_stream_view stream,
153  // end of group
155 } // namespace io::parquet
156 } // namespace CUDF_EXPORT cudf
Interface class for providing input data to the readers.
Definition: datasource.hpp:31
stores offset and size used to indicate a byte range
std::unique_ptr< cudf::io::datasource::buffer > fetch_page_index_to_host(cudf::io::datasource &datasource, byte_range_info const page_index_bytes)
Fetches a host buffer of Parquet page index from the input data source.
std::size_t metadata_size_hint()
Returns the Parquet reader's footer speculative read size in bytes.
std::tuple< std::vector< rmm::device_buffer >, std::vector< std::vector< cudf::device_span< uint8_t const > > >, std::future< void > > fetch_byte_ranges_to_device_async(cudf::host_span< std::reference_wrapper< cudf::io::datasource > const > datasources, cudf::host_span< std::vector< byte_range_info > const > byte_ranges_per_source, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)
Fetches lists of byte ranges from multiple datasources into device buffers.
std::unique_ptr< cudf::io::datasource::buffer > fetch_footer_to_host(cudf::io::datasource &datasource)
Fetches a host buffer of Parquet footer bytes from the input data source.
std::vector< std::unique_ptr< cudf::io::datasource::buffer > > fetch_page_indexes_to_host(cudf::host_span< std::reference_wrapper< cudf::io::datasource > const > datasources, cudf::host_span< byte_range_info const > page_index_bytes_per_source)
Fetches host buffers of Parquet page index bytes from multiple input data sources.
std::vector< std::unique_ptr< cudf::io::datasource::buffer > > fetch_footers_to_host(cudf::host_span< std::reference_wrapper< cudf::io::datasource > const > datasources)
Fetches host buffers of Parquet footer bytes from multiple input data sources.
cuda::mr::resource_ref< cuda::mr::device_accessible > device_async_resource_ref
cuDF interfaces
Definition: host_udf.hpp:26
C++20 std::span with reduced feature set.
Definition: span.hpp:184