Hello,

I have some code that produces either an ICE or error in the .gpu code (which I assume is like object code for the GPU). It has to do with passing a character type as a parameter to another device routine. If I instead try to pass the character constant directly (see the commented call to sgemv in the code below), I get an ICE: unexpected address of constant. Otherwise, I get errors in the .gpu like this:

/tmp/pgcudafor25EgI8rjAavK.gpu(43): error: expected an expression

/tmp/pgcudafor25EgI8rjAavK.gpu(44): error: expected an expression

/tmp/pgcudafor25EgI8rjAavK.gpu(45): error: expected an expression

/tmp/pgcudafor25EgI8rjAavK.gpu(46): error: too many arguments in function call

…

My suspicion is it is an error in the compiler. I’m using version 10.6.

Here’s the offending code:

MODULE imsim_module

use cudafor

contains

subroutine kernel_setup(pixxwidth, pixywidth)

real :: pixxwidth, pixywidth

real, device :: pixxwidthDev, pixywidthDev

type(dim3) :: dimGrid, dimBlock

pixxwidthDev = pixxwidth

pixywidthDev = pixywidth

dimGrid = dim3( detxsize/16, detysize/16, 1 )

dimBlock = dim3( 16, 16, 1 )

call kernel<<<dimGrid,dimBlock>>>(pixxwidthDev, pixywidthDev)

end subroutine kernel_setup

attributes(device) LOGICAL FUNCTION MYLSAME( CA, CB )

CHARACTER CA, CB

MYLSAME = CA.EQ.CB

RETURN

END

attributes(device) SUBROUTINE SGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY )

REAL ALPHA, BETA

INTEGER INCX, INCY, LDA, M, N

CHARACTER*1 TRANS
REAL A( LDA, * ), X( * ), Y( * )
REAL ONE , ZERO
PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 )
REAL TEMP
INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY
LOGICAL MYLSAME
INTRINSIC MAX
CHARACTER*1 NN,TT,CC

NN = ‘N’

TT = ‘T’

CC = ‘C’

INFO = 0

IF ( .NOT.MYLSAME( TRANS, NN ).AND…NOT.MYLSAME( TRANS, TT ).AND…NOT.MYLSAME( TRANS, CC ) )THEN

INFO = 1

ELSE IF( M.LT.0 )THEN

INFO = 2

ELSE IF( N.LT.0 )THEN

INFO = 3

ELSE IF( LDA.LT.MAX( 1, M ) )THEN

INFO = 6

ELSE IF( INCX.EQ.0 )THEN

INFO = 8

ELSE IF( INCY.EQ.0 )THEN

INFO = 11

END IF

IF( INFO.NE.0 )THEN

RETURN

END IF

IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) RETURN

IF( MYLSAME( TRANS, NN ) )THEN

LENX = N

LENY = M

ELSE

LENX = M

LENY = N

END IF

IF( INCX.GT.0 )THEN

KX = 1

ELSE

KX = 1 - ( LENX - 1 )*INCX
END IF
IF( INCY.GT.0 )THEN
KY = 1
ELSE
KY = 1 - ( LENY - 1 )*Y( IY )

*INCY END IF IF( BETA.NE.ONE )THEN IF( INCY.EQ.1 )THEN IF( BETA.EQ.ZERO )THEN DO 10, I = 1, LENY Y( I ) = ZERO 10 CONTINUE ELSE DO 20, I = 1, LENY Y( I ) = BETA*Y( I ) 20 CONTINUE END IF ELSE IY = KY IF( BETA.EQ.ZERO )THEN DO 30, I = 1, LENY Y( IY ) = ZERO IY = IY + INCY 30 CONTINUE ELSE DO 40, I = 1, LENY Y( IY ) = BETA

IY = IY + INCY

40 CONTINUE

END IF

END IF

END IF

IF( ALPHA.EQ.ZERO ) RETURN

IF( MYLSAME( TRANS, NN ) )THEN

JX = KX

IF( INCY.EQ.1 )THEN

DO 60, J = 1, N

IF( X( JX ).NE.ZERO )THEN

TEMP = ALPHA

*X( JX )*

DO 50, I = 1, M

Y( I ) = Y( I ) + TEMPA( I, J )

DO 50, I = 1, M

Y( I ) = Y( I ) + TEMP

50 CONTINUE

END IF

JX = JX + INCX

60 CONTINUE

ELSE

DO 80, J = 1, N

IF( X( JX ).NE.ZERO )THEN

TEMP = ALPHA

*X( JX )*

IY = KY

DO 70, I = 1, M

Y( IY ) = Y( IY ) + TEMPA( I, J )

IY = KY

DO 70, I = 1, M

Y( IY ) = Y( IY ) + TEMP

IY = IY + INCY

70 CONTINUE

END IF

JX = JX + INCX

80 CONTINUE

END IF

ELSE

JY = KY

IF( INCX.EQ.1 )THEN

DO 100, J = 1, N

TEMP = ZERO

DO 90, I = 1, M

TEMP = TEMP + A( I, J )

*X( I )*

90 CONTINUE

Y( JY ) = Y( JY ) + ALPHATEMP

90 CONTINUE

Y( JY ) = Y( JY ) + ALPHA

JY = JY + INCY

100 CONTINUE

ELSE

DO 120, J = 1, N

TEMP = ZERO

IX = KX

DO 110, I = 1, M

TEMP = TEMP + A( I, J )

*X( IX )*

IX = IX + INCX

110 CONTINUE

Y( JY ) = Y( JY ) + ALPHATEMP

IX = IX + INCX

110 CONTINUE

Y( JY ) = Y( JY ) + ALPHA

JY = JY + INCY

120 CONTINUE

END IF

END IF

RETURN

END

attributes(global) subroutine kernel(pixxwidth, pixywidth)

real :: pixxwidth, pixywidth

character, device :: TT

real, device :: A(4,2), X(2), Y(2)

real, device :: alpha, beta

alpha = 1.0

beta = 1.0

TT = ‘T’

! call sgemv(‘T’, 4, 2, alpha, A, 4, X, 1, beta, Y, 1 )

call sgemv(TT, 4, 2, alpha, A, 4, X, 1, beta, Y, 1 )

end subroutine kernel

END MODULE imsim_module

On another note, I’ve been working with some code that I’m porting from F77 code. The code uses BLAS/LAPACK/other netlib routines that use optimized machine constant code (e.g., r1mach.f, d1mach.f, etc. off of netlib). Is there some existing code for the CUDA Fortran that allows me to optimize my routines to the GPU architecture with machine constants?

Thanks.