The size of the allocatable arrays in device subroutines

Hi,

I’m still testing the allocate statements in device subroutines and I come across a new problem.

This is my test code:

MODULE mod_kernel

  USE cudafor

CONTAINS


  ATTRIBUTES(GLOBAL) SUBROUTINE kernel(x,n)

    IMPLICIT NONE

    INTEGER,VALUE,INTENT(IN) :: n
    INTEGER,DEVICE,INTENT(OUT) :: x(n)
    INTEGER,DEVICE,ALLOCATABLE :: A(:)

    INTEGER :: tid, STATUS

    tid = threadidx%x

    ALLOCATE(A(13317),STAT=STATUS)

    WRITE(*,*) STATUS

    IF (tid<=n) THEN
       A = tid
       x(tid) = SUM(A)
    END IF
    
    CALL SYNCTHREADS()

    DEALLOCATE(A)

    RETURN

  END SUBROUTINE kernel


END MODULE mod_kernel




PROGRAM test
  
  USE cudafor
  USE mod_kernel

  IMPLICIT NONE

  INTEGER,PARAMETER :: n=100
  INTEGER :: x(n)
  INTEGER,ALLOCATABLE,DEVICE :: dev_x(:)
  

  x = 1
  ALLOCATE(dev_x(n))
  
  CALL kernel<<<1,n>>>(dev_x,n)

  x = dev_x
  DEALLOCATE(dev_x)

  WRITE(*,*) x
  
END PROGRAM test

I find the maximum size of the allocatable array A in the device subroutine kernel of the module mod_kernel is 13316. When a larger number is adopted, errors occur.

Error Message:

NVRM: Xid (0000:04:00): 31 Ch00000002, engmask 00000101, intr 10000000

0: copyout Memcpy (host=0x6b8600, dev=0xb01600000, size=400) FAILED: 4(unspecified launch failure)

From the value of the variable STATUS, we can figure out that some of the threads allocate memory successfully while others not.

Why does this occur?
Can I change the maximum size of the allocatable array to be larger with altering the compiler opition?

Hi OceanCloud,

Looks like you’re going beyond the max heap size. To raise this, call “cudaDeviceSetLimit”.

  • Mat
% cat test.f90
MODULE mod_kernel

   USE cudafor

 CONTAINS


   ATTRIBUTES(GLOBAL) SUBROUTINE kernel(x,n)

     IMPLICIT NONE

     INTEGER,VALUE,INTENT(IN) :: n
     INTEGER,DEVICE,INTENT(OUT) :: x(n)
     INTEGER,DEVICE,ALLOCATABLE :: A(:)

     INTEGER :: tid, STATUS

     tid = threadidx%x

     ALLOCATE(A(13317),STAT=STATUS)

!     WRITE(*,*) STATUS

     IF (tid<=n) THEN
        A = tid
        x(tid) = SUM(A)
     END IF

     CALL SYNCTHREADS()

     DEALLOCATE(A)

     RETURN

   END SUBROUTINE kernel


 END MODULE mod_kernel




 PROGRAM test

   USE cudafor
   USE mod_kernel

   IMPLICIT NONE

   INTEGER,PARAMETER :: n=100
   INTEGER :: x(n)
   INTEGER,ALLOCATABLE,DEVICE :: dev_x(:)
   integer(kind=cuda_count_kind) :: val
   integer :: rc

   rc= cudaDeviceGetLimit( val, cudaLimitMallocHeapSize )
   print *, "Max Heap Size: ", val
   val = val*2
   rc= cudaDeviceSetLimit( cudaLimitMallocHeapSize, val )
   rc= cudaDeviceGetLimit( val, cudaLimitMallocHeapSize )
   print *, "New Heap Size: ", val

   x = 1
   ALLOCATE(dev_x(n))

   CALL kernel<<<1,n>>>(dev_x,n)

   x = dev_x
   DEALLOCATE(dev_x)

   WRITE(*,*) x

 END PROGRAM test
% pgf90 -Mcuda=rdc,cuda5.5,cc35 test.f90 ; a.out
 Max Heap Size:                   8388608
 New Heap Size:                  16777216
        13317        26634        39951        53268        66585        79902
        93219       106536       119853       133170       146487       159804
       173121       186438       199755       213072       226389       239706
       253023       266340       279657       292974       306291       319608
       332925       346242       359559       372876       386193       399510
       412827       426144       439461       452778       466095       479412
       492729       506046       519363       532680       545997       559314
       572631       585948       599265       612582       625899       639216
       652533       665850       679167       692484       705801       719118
       732435       745752       759069       772386       785703       799020
       812337       825654       838971       852288       865605       878922
       892239       905556       918873       932190       945507       958824
       972141       985458       998775      1012092      1025409      1038726
      1052043      1065360      1078677      1091994      1105311      1118628
      1131945      1145262      1158579      1171896      1185213      1198530
      1211847      1225164      1238481      1251798      1265115      1278432
      1291749      1305066      1318383      1331700