Most RAPIDS requests are in python but the C++ interface is the Python impl foundation. This sample code is meant to be building blocks for your code already presented and it use generic auto data typing to skip past impl details of varing column data types. Being that your question was mainly about setting column dtypes, please look over this Issue which lists all the possible types, https://github.com/rapidsai/cudf/issues/1119 . For character strings see this more detailed developers guide for libcudf and string_scalar topic https://docs.rapids.ai/api/libcudf/stable/developer_guide

|
Use traits to set gdf_data
elements and other typedefs · Issue #1119 · rapidsai/cudf
Once #892 is merged and after we move away from cffi in #599 , update the union gdf_data to use traits defined members. The code to be changed: typedef union { int8_t si08; /**< GDF_INT8 */ int16_t…
github.com
|
#include <cudf/io/functions.hpp>
#include <cudf/io/csv.hpp>
#include <cudf/column/column_factories.hpp>
#include <cudf/types.hpp>
std::unordered_map<std::string, std::vector<gdf_scalar>> read_csv_with_types(const std::string& filename, const std::unordered_map<std::string, gdf_dtype>& column_types)
{
// Create an empty unordered map to store the extracted data
std::unordered_map<std::string, std::vector<gdf_scalar>> data;
// Read the CSV file using cuDF
cudf::io::csv_reader_options options = cudf::io::csv_reader_options::builder(cudf::io::source_info{filename});
for (const auto& column_type : column_types)
{
options.set_dtypes({column_type.first}, {column_type.second});
}
auto result = cudf::io::read_csv(options);
// Iterate over each column in the result table
auto table = result.tbl;
for (cudf::size_type i = 0; i < table.num_columns(); ++i)
{
const auto& column = table.get_column(i);
const auto& column_name = column->name();
const auto& column_data = static_cast<const cudf::column_view&>(*column).begin<gdf_scalar>();
// Store the column data in the unordered map
data[column_name] = std::vector<gdf_scalar>(column_data, column_data + column->size());
}
return data;
}
int main()
{
// Specify the filename of the CSV file
std::string filename = “your_file.csv”;
// Specify the data types of each column
std::unordered_map<std::string, gdf_dtype> column_types;
column_types[“column1”] = GDF_INT32;
column_types[“column2”] = GDF_FLOAT64;
column_types[“column3”] = GDF_STRING;
// Call the CSV file reader function
std::unordered_map<std::string, std::vector<gdf_scalar>> data = read_csv_with_types(filename, column_types);
// Access the extracted data
for (const auto& column_data : data)
{
const std::string& column_name = column_data.first;
const std::vector<gdf_scalar>& column_values = column_data.second;
std::cout << “Column '” << column_name << “’ data:” << std::endl;
for (const auto& scalar : column_values)
{
// Access scalar value using scalar.is_valid and scalar.data
// …
}
}
return 0;
}