OK I don’t understand the difference in results from the code at the bottom of this post. The results from,
nvcc -I /opt/NVIDIA_CUDA_SDK/common/inc/ Main.cu -deviceemu
are,
host: (2,2)
host: (2,2)
host: (2,2)
host: (2,2)
host: (2,2)
host: (2,2)
---------------
host: (4,4)
host: (4,4)
host: (4,4)
host: (4,4)
host: (4,4)
as I would expect. The results from
nvcc -I /opt/NVIDIA_CUDA_SDK/common/inc/ Main.cu -g -G
are,
host: (2,2)
host: (2,2)
host: (2,2)
host: (2,2)
host: (2,2)
---------------
host: (2,2)
host: (2,2)
host: (2,2)
host: (2,2)
host: (2,2)
Not as I’d expect. The results from just,
nvcc -I /opt/NVIDIA_CUDA_SDK/common/inc/ Main.cu
are,
host: (2,2)
host: (2,2)
host: (2,2)
host: (2,2)
host: (2,2)
---------------
host: (512,2)
host: (512,2)
host: (512,2)
host: (512,2)
host: (512,2)
So what have I done wrong?
Thanks very much.
#include <stdio.h>
#include <assert.h>
#include <cuda.h>
#include <cutil.h>
__global__ void testDouble2(double2* vaid, int N) {
int tx = threadIdx.x;
int ty = threadIdx.y;
int bx = blockIdx.x;
int by = blockIdx.y;
//printf("(%i,%i)-(%i,%i)\n",bx,by,tx,ty);
vaid[tx].x = vaid[tx].x + vaid[tx].x;
vaid[tx].y = vaid[tx].y + vaid[tx].y;
}
int main(void) {
double2 host[] = {
make_double2(2, 2),
make_double2(2, 2),
make_double2(2, 2),
make_double2(2, 2),
make_double2(2, 2),
};
for (int i = 0; i < 5;i++){
printf("host: (%g,%g)\n", host[i].x, host[i].y);
}
printf("---------------\n");
double2* device;
cudaMalloc((void **) &device, sizeof(double2)*5);
cudaMemcpy(device, &host,sizeof(double2)*5, cudaMemcpyHostToDevice);
dim3 threads(5, 1);
dim3 grid(1,1);
testDouble2<<< grid,threads >>> (device, 5);
cudaMemcpy(&host, device, sizeof(double2)*5, cudaMemcpyDeviceToHost);
for (int i = 0; i < 5;i++){
printf("host: (%g,%g)\n", host[i].x, host[i].y);
}
}