I have a gpu quadro p600 ,but it doesn"t work here.
call sgemmNN_16x16<<<blocks, threads>>>(dA, dB, dC, m, N, k, alpha, beta)
end do
istat = cudaEventRecord(stop, 0)
istat = cudaThreadSynchronize()
istat = cudaEventElapsedTime(time, start, stop)
time = time / (NREPS*1.0e3)
C = dC
nerrors = 0
do j = 1, N
do i = 1, N
if (abs(gold(i,j) - C(i,j)) .gt. 1.0e-4) then
nerrors = nerrors + 1
end if
end do
end do
if (nerrors .eq. 0) then
print ,“Test passed!”
Else
print ,nerrors, " errors were encountered"
endif
gflops = 2.0 * N * N * N/time/1e9
write (,901) m,k,k,N,time1.0e3,gflops
900 format(’\nDevice:’,a,’, ‘,f6.1,’ MHz clock, ‘,f6.1,’ MB memory.\n’)
901 format(i0,‘x’,i0,’ * ‘,i0,‘x’,i0,’:\t’,f8.3,’ ms\t’,f8.3,’ GFlops/s’)
end program
\nDevice:Quadro P600, 1556.5 MHz clock, 2048.0 MB memory.\n
65536 errors were encountered
256x256 * 256x256:\t 0.000 ms\t******** GFlops/s
请按任æ„�键继ç». . .[/url]
Hi eggbaby123456,
What compiler flags did you use to compile?
Since the Quadro P600 is a Pascal based device, you’ll need to compile with “-Mcuda=cc60” to enable Pascal code generation.
-Mat
Hi mkcolg
Thank you for help me, but i don’t know how to set compiler flags with “-Mcuda=cc60”, please help me again.