Hello,
I am trying to use a pointer in a cuda fortran kernel, however the compiler won’t seem to let me. I made a simplified version of the kernel that reproduces the same errors.
The program
module cuda_kernels
use cudafor
implicit none
contains
attributes(global) subroutine process_slice(matrix, nrows, ncols, target_row)
! Kernel to process a particular row of the matrix
real, device, intent(inout), target :: matrix(nrows,ncols)
integer, value :: nrows, ncols, target_row
integer :: idx_thread
real, device, pointer :: row_ptr(:)
idx_thread = threadIdx%x + (blockIdx%x - 1) * blockDim%x
row_ptr => matrix(target_row, :)
! row_ptr(1:ncols) => matrix(target_row, :) ! this also fails
if (idx_thread < ncols) then
row_ptr(idx_thread) = row_ptr(idx_thread) + 1.0
end if
end subroutine process_slice
end module cuda_kernels
program test_cuda_pointers
use cuda_kernels
real, allocatable :: matrix_h(:,:)
real, device, allocatable :: matrix_d(:, :)
integer :: i, j
integer, parameter :: nrows = 5
integer, parameter :: ncols = 5
allocate(matrix_h(nrows, ncols))
allocate(matrix_d(nrows, ncols))
! Initialize the host matrix
do i = 1, nrows
do j = 1, ncols
matrix_h(i, j) = real((i - 1) * ncols + j)
end do
end do
! copy the matrix from host to device
matrix_d = matrix_h
print *, "Matrix before processing:"
print *, matrix_h
! Launch the kernel to increment each element of the second row by 1
call process_slice<<<1, ncols>>>(matrix_d, nrows, ncols, 2)
! Copy the data back to the host
matrix_h = matrix_d
print *, "Matrix after processing the second row:"
print *, matrix_h
! Deallocate the device matrix
deallocate(matrix_d)
deallocate(matrix_h)
end program test_cuda_pointers
produces the following errors based on if row_ptr => matrix(target_row, :)
or row_ptr(1:ncols) => matrix(target_row, :)
is used.
The errors are
> nvfortran -O0 -cuda -gpu=cc61,debug, -g -o test_cuda_pointers test_cuda_pointers.f90
NVFORTRAN-F-0155-Compiler failed to translate accelerator region (see -Minfo messages): Unsupported procedure (test_cuda_pointers.f90: 5)
NVFORTRAN/x86-64 Linux 24.7-0: compilation aborted
or
> nvfortran -O0 -cuda -gpu=cc61,debug, -g -o test_cuda_pointers test_cuda_pointers.f90
NVFORTRAN-S-0155-Call to compiler runtime library function not supported. pghpf_ptr_assn_i8 (test_cuda_pointers.f90: 5)
Unimplemented opcode: 0
NVFORTRAN-F-0000-Internal compiler error. Unimplemented opcode. 4 (test_cuda_pointers.f90: 5)
NVFORTRAN/x86-64 Linux 24.7-0: compilation aborted
I have looked in the documentation but have not found a place where it says that I can not do this with pointers, but perhaps I just missed it. I tried adding -MINFO=all
, but that didn’t add anymore information to the output.
I will try this with the newest set of compilers when I have a chance.
Thank you for your help,
Josh