Non consistent result when "SELECTCASE" is used

Dear Sir,

I am a beginner of PGI accelerator. Anyone could tell me whether PGI accelerator support “SELECTCASE” within the “!$acc region”? I found that sometimes it is work, but sometimes it is not correct.

When I disable the lines of Cof5(J,L) of the code, it works well. But when I enable them, the result of GPU and CPU becomes non-consistent.

My PC setting:
PGI 10.0
CUDA 2.2
Display card: FX4600

I compile the code in this way:
pgfortran -o test test.for -ta=nvidia,time -Minfo=accel

Any people could help me??

Thanks!

Kelvin

c-----Main Program (22 Dec 2009)	
      program test
      use accel_lib	 
	implicit none
      integer,parameter :: N=600,M=4
	integer J,L,CaseNos(N,M)
      real Cof1(N,M),Cof2(N,M),Cof3(N,M),Cof4(N,M),Cof5(N,M)
	real dummy1(N,M),dummy2(N,M)

c	***** Coefficients *****
	do J=1,100
	do L=1,M
	CaseNos(J,L)=1		
	enddo
	enddo
	do J=101,200
	do L=1,M
	CaseNos(J,L)=2		
	enddo
	enddo
	do J=201,300
	do L=1,M
	CaseNos(J,L)=3		
	enddo
	enddo
	do J=301,400
	do L=1,M
	CaseNos(J,L)=4		
	enddo
	enddo
	do J=401,500
	do L=1,M
	CaseNos(J,L)=5		
	enddo
	enddo
	do J=501,600
	do L=1,M
	CaseNos(J,L)=6		
	enddo
	enddo
c	***** Coefficients *****

c	***** GPU *****
!$acc  region	
	do J=1,N
	do L=1,M	
	SELECTCASE(CaseNos(J,L)) 
	CASE(1)
	Cof1(J,L)=1	
      Cof2(J,L)=3
      Cof3(J,L)=50		           
      Cof4(J,L)=4
c      Cof5(J,L)=(-Cof3(J,L)+(sqrt(Cof3(J,L)*Cof3(J,L)
c     &           -4*Cof2(J,L)*Cof4(J,L))))*(0.5/Cof2(J,L))
      Cof2(J,L)=-(Cof1(J,L)+Cof5(J,L))/150    
      Cof1(J,L)=1*Cof5(J,L)+1     	
	Cof3(J,L)=3*Cof5(J,L)+3  
	Cof4(J,L)=4*Cof5(J,L)+4	                           
	CASE(2)	
	Cof1(J,L)=1	
      Cof2(J,L)=3
      Cof3(J,L)=50		           
      Cof4(J,L)=4
c      Cof5(J,L)=(-Cof3(J,L)-(sqrt(Cof3(J,L)*Cof3(J,L)
c     &           -4*Cof2(J,L)*Cof4(J,L))))*(0.5/Cof2(J,L))
      Cof2(J,L)=-(Cof1(J,L)+Cof5(J,L))/150    
      Cof1(J,L)=1*Cof5(J,L)+1     	
	Cof3(J,L)=3*Cof5(J,L)+3  
	Cof4(J,L)=4*Cof5(J,L)+4	          
	CASE(3)
	Cof1(J,L)=1	
      Cof2(J,L)=3
      Cof3(J,L)=50	           
      Cof4(J,L)=4
c      Cof5(J,L)=(-Cof3(J,L)+(sqrt(Cof3(J,L)*Cof3(J,L)
c     &           -4*Cof2(J,L)*Cof4(J,L))))*(0.5/Cof2(J,L))
      Cof2(J,L)=-(Cof1(J,L)+Cof5(J,L))/150    
      Cof1(J,L)=1*Cof5(J,L)+1     	
	Cof3(J,L)=3*Cof5(J,L)+3  
	Cof4(J,L)=4*Cof5(J,L)+4		        	          
	CASE(4) 
	Cof1(J,L)=1	
      Cof2(J,L)=3
      Cof3(J,L)=50		           
      Cof4(J,L)=4
c      Cof5(J,L)=(-Cof3(J,L)-(sqrt(Cof3(J,L)*Cof3(J,L)
c     &           -4*Cof2(J,L)*Cof4(J,L))))*(0.5/Cof2(J,L))
      Cof2(J,L)=-(Cof1(J,L)+Cof5(J,L))/150    
      Cof1(J,L)=1*Cof5(J,L)+1     	
	Cof3(J,L)=3*Cof5(J,L)+3  
	Cof4(J,L)=4*Cof5(J,L)+4	           
	CASE(5)	
	Cof1(J,L)=1	
      Cof2(J,L)=3
      Cof3(J,L)=50		           
      Cof4(J,L)=4
c      Cof5(J,L)=(-Cof3(J,L)+(sqrt(Cof3(J,L)*Cof3(J,L)
c     &           -4*Cof2(J,L)*Cof4(J,L))))*(0.5/Cof2(J,L))
      Cof2(J,L)=-(Cof1(J,L)+Cof5(J,L))/150   
      Cof1(J,L)=1*Cof5(J,L)+1     	
	Cof3(J,L)=3*Cof5(J,L)+3  
	Cof4(J,L)=4*Cof5(J,L)+4	                 
	CASE(6) 
	Cof1(J,L)=1	
      Cof2(J,L)=3
      Cof3(J,L)=50		           
      Cof4(J,L)=4
c      Cof5(J,L)=(-Cof3(J,L)-(sqrt(Cof3(J,L)*Cof3(J,L)
c     &           -4*Cof2(J,L)*Cof4(J,L))))*(0.5/Cof2(J,L))
      Cof2(J,L)=-(Cof1(J,L)+Cof5(J,L))/150    
      Cof1(J,L)=1*Cof5(J,L)+1     	
	Cof3(J,L)=3*Cof5(J,L)+3  
	Cof4(J,L)=4*Cof5(J,L)+4	       	   
	ENDSELECT	 
	dummy1(J,L)=Cof1(J,L)+Cof2(J,L)+Cof3(J,L)+Cof4(J,L)+Cof5(J,L)
	enddo
	enddo
!$acc  end region
c	***** GPU *****

c	***** Host *****
	do J=1,N
	do L=1,M	
	SELECTCASE(CaseNos(J,L)) 
	CASE(1)
	Cof1(J,L)=1	
      Cof2(J,L)=3
      Cof3(J,L)=50		           
      Cof4(J,L)=4
c      Cof5(J,L)=(-Cof3(J,L)+(sqrt(Cof3(J,L)*Cof3(J,L)
c     &           -4*Cof2(J,L)*Cof4(J,L))))*(0.5/Cof2(J,L))
      Cof2(J,L)=-(Cof1(J,L)+Cof5(J,L))/150    
      Cof1(J,L)=1*Cof5(J,L)+1     	
	Cof3(J,L)=3*Cof5(J,L)+3  
	Cof4(J,L)=4*Cof5(J,L)+4	                           
	CASE(2)	
	Cof1(J,L)=1	
      Cof2(J,L)=3
      Cof3(J,L)=50		           
      Cof4(J,L)=4
c      Cof5(J,L)=(-Cof3(J,L)-(sqrt(Cof3(J,L)*Cof3(J,L)
c     &           -4*Cof2(J,L)*Cof4(J,L))))*(0.5/Cof2(J,L))
      Cof2(J,L)=-(Cof1(J,L)+Cof5(J,L))/150    
      Cof1(J,L)=1*Cof5(J,L)+1     	
	Cof3(J,L)=3*Cof5(J,L)+3  
	Cof4(J,L)=4*Cof5(J,L)+4	          
	CASE(3)
	Cof1(J,L)=1	
      Cof2(J,L)=3
      Cof3(J,L)=50	           
      Cof4(J,L)=4
c      Cof5(J,L)=(-Cof3(J,L)+(sqrt(Cof3(J,L)*Cof3(J,L)
c     &           -4*Cof2(J,L)*Cof4(J,L))))*(0.5/Cof2(J,L))
      Cof2(J,L)=-(Cof1(J,L)+Cof5(J,L))/150    
      Cof1(J,L)=1*Cof5(J,L)+1     	
	Cof3(J,L)=3*Cof5(J,L)+3  
	Cof4(J,L)=4*Cof5(J,L)+4		        	          
	CASE(4) 
	Cof1(J,L)=1	
      Cof2(J,L)=3
      Cof3(J,L)=50		           
      Cof4(J,L)=4
c      Cof5(J,L)=(-Cof3(J,L)-(sqrt(Cof3(J,L)*Cof3(J,L)
c     &           -4*Cof2(J,L)*Cof4(J,L))))*(0.5/Cof2(J,L))
      Cof2(J,L)=-(Cof1(J,L)+Cof5(J,L))/150    
      Cof1(J,L)=1*Cof5(J,L)+1     	
	Cof3(J,L)=3*Cof5(J,L)+3  
	Cof4(J,L)=4*Cof5(J,L)+4	           
	CASE(5)	
	Cof1(J,L)=1	
      Cof2(J,L)=3
      Cof3(J,L)=50		           
      Cof4(J,L)=4
c      Cof5(J,L)=(-Cof3(J,L)+(sqrt(Cof3(J,L)*Cof3(J,L)
c     &           -4*Cof2(J,L)*Cof4(J,L))))*(0.5/Cof2(J,L))
      Cof2(J,L)=-(Cof1(J,L)+Cof5(J,L))/150   
      Cof1(J,L)=1*Cof5(J,L)+1     	
	Cof3(J,L)=3*Cof5(J,L)+3  
	Cof4(J,L)=4*Cof5(J,L)+4	                 
	CASE(6) 
	Cof1(J,L)=1	
      Cof2(J,L)=3
      Cof3(J,L)=50		           
      Cof4(J,L)=4
c      Cof5(J,L)=(-Cof3(J,L)-(sqrt(Cof3(J,L)*Cof3(J,L)
c     &           -4*Cof2(J,L)*Cof4(J,L))))*(0.5/Cof2(J,L))
      Cof2(J,L)=-(Cof1(J,L)+Cof5(J,L))/150    
      Cof1(J,L)=1*Cof5(J,L)+1     	
	Cof3(J,L)=3*Cof5(J,L)+3  
	Cof4(J,L)=4*Cof5(J,L)+4	         	   
	ENDSELECT	 
	dummy2(J,L)=Cof1(J,L)+Cof2(J,L)+Cof3(J,L)+Cof4(J,L)+Cof5(J,L)	
	enddo
	enddo
c	***** Host *****

c	***** Check *****
	do J=1,N
	do L=1,M
	if(dummy1(J,L).ne.dummy2(J,L))then
	print*,J,L,dummy1(J,L),dummy2(J,L)
	endif
	enddo
	enddo				     												
c	***** Check *****
      
c-----Program Ended
	end program test

I have fixed the problem by using a old compiler.

Posted: Fri Dec 11, 2009 11:57 am Post subject:


Hi Marco,

Code:
The problem is that it doesn’t access to what is inside the if statement.

This is a compiler bug. I just found it myself yesterday and reported it to our engineers as TPR#16426. I consider this as critical bug that must be fixed soon.

In the mean time, you might be able to work around the bug by using an undocumented flag “-ta=nvidia,oldcg”. In 10.0 we implemented a code generator which does give better performance, but obvious still has a few problems. “oldcg” will use our previous code generator.

I apologize that our internal testing missed this error and hopefully can have it fixed by early next year.

  • Mat

Hi sinsin,

Yes, it looks like it might be the same bug since selects turn into if statements. I’ll test your code as well once we have a fix in place.

Thanks,
Mat

Hi sinsin,

FYI, I just verified that TPR#16426 has been fixed and your code works correctly with the 10.1 release of the PGI compilers. 10.1 should be available later this week.

  • Mat

Hi Mat,

Thanks for your notification so much!

sinsin