I have the following CUDA code:
#include <fstream>
#include <iostream>
#include <vector>
#include <cstring>
#include <fcntl.h>
#include <sys/types.h>
#include <errno.h>
#include <unistd.h>
#include <thrust/device_vector.h>
#include <cufile.h>
int main(){
{
std::ofstream fout("temp.bin", std::ios::binary);
for(std::size_t i = 0; i < 4100096; ++i){
fout.write("\0", 1);
}
}
int exit_status = 1;
CUfileDescr_t desc{};
CUfileHandle_t handle{};
thrust::device_vector<unsigned char> data(8192, 0);
std::vector<CUfileIOParams_t> io_params;
CUfileBatchHandle_t batch_handle;
std::vector<CUfileIOEvents_t> io_events(2);
unsigned total_completed = 0;
int fd = ::open("temp.bin", O_RDONLY | O_DIRECT);
if(fd == -1){
auto err = errno;
std::cerr << "::open errno=" << err << std::endl;
return 1;
}
{
CUfileError_t status = ::cuFileDriverOpen();
if(status.err != CU_FILE_SUCCESS){
std::cerr << "::cuFileDriverOpen status={.err=" << status.err << ", .cu_err=" << status.cu_err << "}" << std::endl;
goto do_driver_close;
}
}
desc.type = CU_FILE_HANDLE_TYPE_OPAQUE_FD;
desc.handle.fd = fd;
{
CUfileError_t status = ::cuFileHandleRegister(&handle, &desc);
if(status.err != CU_FILE_SUCCESS){
std::cerr << "::cuFileHandleRegister status={.err=" << status.err << ", .cu_err=" << status.cu_err << "}" << std::endl;
goto do_close;
}
}
{
CUfileError_t status = ::cuFileBufRegister(static_cast<const void *>(thrust::raw_pointer_cast(data.data())), 8192, 0);
if(status.err != CU_FILE_SUCCESS){
std::cerr << "::cuFileBufRegister status={.err=" << status.err << ", .cu_err=" << status.cu_err << "}" << std::endl;
goto do_file_deregister;
}
}
io_params.push_back({
.mode = cufileBatchMode::CUFILE_BATCH,
.u = {
.batch = {
.devPtr_base = static_cast<void *>(thrust::raw_pointer_cast(data.data())),
.file_offset = 3948544,
.devPtr_offset = 0,
.size = 4096
}
},
.fh = handle,
.opcode = CUfileOpcode::CUFILE_READ,
.cookie = nullptr
});
io_params.push_back({
.mode = cufileBatchMode::CUFILE_BATCH,
.u = {
.batch = {
.devPtr_base = static_cast<void *>(thrust::raw_pointer_cast(data.data())),
.file_offset = 929792,
.devPtr_offset = 4096,
.size = 4096
}
},
.fh = handle,
.opcode = CUfileOpcode::CUFILE_READ,
.cookie = nullptr
});
for(auto &io_param : io_params){
io_param.cookie = static_cast<void *>(&io_param);
}
{
CUfileError_t status = ::cuFileBatchIOSetUp(&batch_handle, io_params.size());
if(status.err != CU_FILE_SUCCESS){
std::cerr << "::cuFileBatchIOSetUp status={.err=" << status.err << ", .cu_err=" << status.cu_err << "}" << std::endl;
goto do_buf_deregister;
}
}
{
CUfileError_t status = ::cuFileBatchIOSubmit(batch_handle, io_params.size(), io_params.data(), 0);
if(status.err != CU_FILE_SUCCESS){
std::cerr << "::cuFileBatchIOSubmit status={.err=" << status.err << ", .cu_err=" << status.cu_err << "}" << std::endl;
goto do_batch_destroy;
}
}
(void)std::memset(static_cast<void *>(io_events.data()), 0, io_events.size() * sizeof(CUfileIOEvents_t));
while(total_completed < io_params.size()){
unsigned nr = io_params.size();
CUfileError_t status = ::cuFileBatchIOGetStatus(batch_handle, io_params.size(), &nr, io_events.data(), nullptr);
if(status.err != CU_FILE_SUCCESS){
std::cerr << "::cuFileBatchIOGetStatus status={.err=" << status.err << ", .cu_err=" << status.cu_err << "}" << std::endl;
goto do_batch_destroy;
}
total_completed += nr;
}
exit_status = 0;
do_batch_destroy:
::cuFileBatchIODestroy(batch_handle);
do_buf_deregister:
{
CUfileError_t status = ::cuFileBufDeregister(static_cast<const void *>(thrust::raw_pointer_cast(data.data())));
if(status.err != CU_FILE_SUCCESS){
std::cerr << "::cuFileBufDeregister status={.err=" << status.err << ", .cu_err=" << status.cu_err << "}" << std::endl;
}
}
do_file_deregister:
::cuFileHandleDeregister(handle);
do_driver_close:
{
CUfileError_t status = ::cuFileDriverClose();
if(status.err != CU_FILE_SUCCESS){
std::cerr << "::cuFileDriverClose status={.err=" << status.err << ", .cu_err=" << status.cu_err << "}" << std::endl;
}
}
do_close:
{
int ret = ::close(fd);
if(ret == -1){
auto err = errno;
std::cerr << "::close errno=" << err << std::endl;
}
}
return exit_status;
}
And compiles it with command nvcc -std=c++17 -g -o temp1_cu temp1.cu /usr/local/cuda-11.7/targets/x86_64-linux/lib/libcufile.so
.
I would expect the program run and exit normally when I execute it. However, what I got was the following output:
::cuFileBatchIOSubmit status={.err=5022, .cu_err=0}
And the generated cufile.log
contains the following content:
19-05-2023 18:52:43:699 [pid=4194 tid=4194] WARN 0:172 failed to open /proc/driver/nvidia-fs/devcount error: No such file or directory
19-05-2023 18:52:43:699 [pid=4194 tid=4194] NOTICE cufio-drv:705 running in compatible mode
19-05-2023 18:52:43:799 [pid=4194 tid=4194] ERROR cufio_batch:218 Non-registered case, not yet implemented
19-05-2023 18:52:43:799 [pid=4194 tid=4194] ERROR cufio_batch:352 Error while submitting IO events 0 errno: 0
19-05-2023 18:52:43:799 [pid=4194 tid=4194] ERROR 0:258 Batch Ctx state transition failed in cuFileBatchGetContextFromID
19-05-2023 18:52:43:799 [pid=4194 tid=4194] ERROR cufio_batch:597 Batch state is not in IDLE state, try destroy later
My question is: what kind of input for ::cuFileBatchIOSubmit
would trigger such error? I read the GDS API documentation but could not find any clues.
My OS is RHEL 8 with CUDA 11.7 installed through the .run
file, along with nvidia driver 515.65.01 and GDS 1.3.1. My host compiler is GCC 11 provided by gcc-toolset-11
system package. I don’t have any GDS-related environment variable set, and no /etc/cufile.json
exists in my environment.