When I try to add a value in constatnt memory to another constant it gets what seem to be the wrong answer.
I am new to Cuda and probably doing something wrong - but I am baffled and would appreciate someone setting me straight.
I tried to make the code example concise.
Thanks,
Dave
Emulator output (expected):
0 1.000000 +1 = 2.000000 ?
1 2.000000 +1 = 3.000000 ?
2 3.000000 +1 = 4.000000 ?
3 4.000000 +1 = 5.000000 ?
4 5.000000 +1 = 6.000000 ?
5 6.000000 +1 = 7.000000 ?
Release mode output:
0 1.000000 +1 = 1.000000 ?
1 2.000000 +1 = 1.000000 ?
2 3.000000 +1 = 1.000000 ?
3 4.000000 +1 = 1.000000 ?
4 5.000000 +1 = 1.000000 ?
5 6.000000 +1 = 1.000000 ?
Press ENTER to exit…
//
// Main Cuda routines for LLE Ray Trace
//
#include <stdlib.h> // System include files
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <float.h>
#include <cutil_inline.h> // Cuda utility routines that came with the examples
device constant double my_consts[6];
global void kernel(double d1, double d2)
{
unsigned int tid = threadIdx.x; //Ray Id
double t=my_consts[tid];
d1[tid] = t;
d2[tid] = t+1;
}
void runTest( int argc, char argv);
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main( int argc, char** argv)
{
runTest( argc, argv);
cutilExit(argc, argv);
}
////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
void
runTest( int argc, char** argv)
{
//
// Use command-line specified CUDA device, otherwise use device with highest Gflops/s
//
if( cutCheckCmdLineFlag(argc, (const char**)argv, “device”) )
cutilDeviceInit(argc, argv);
else
cudaSetDevice( cutGetMaxGflopsDeviceId() );
//
// Allocate
//
double h_dat[6]={1.0,2.0,3.0,4.0,5.0,6.0};
cutilSafeCall(cudaMemcpyToSymbol(my_consts, h_dat, 48,0, cudaMemcpyHostToDevice));
double* h_out1 = (double*) malloc(48);
double* h_out2 = (double*) malloc(48);
double* d_dat1;
cutilSafeCall( cudaMalloc( (void**) &d_dat1, 48));
double* d_dat2;
cutilSafeCall( cudaMalloc( (void**) &d_dat2, 48));
dim3 grid(1, 1, 1);
dim3 block(6,1,1);
//
// Execute the kernel
//
kernel<<< grid,block >>>(d_dat1,d_dat2);
cudaThreadSynchronize();
cutilCheckMsg(“Kernel execution failed”);
//
// Copy result deposit back to host
//
cutilSafeCall( cudaMemcpy( h_out1, d_dat1, 48,
cudaMemcpyDeviceToHost) );
cutilSafeCall( cudaMemcpy( h_out2, d_dat2, 48,
cudaMemcpyDeviceToHost) );
for( unsigned int i = 0; i < 6; i++)
{
printf(“%i %f +1 = %f ?\n”,i,h_out1[i],h_out2[i]);
}
cudaThreadExit();
}