I allocated global memory variable in host function, and I changed this varible value in device function.
But, This value wasn’t changed.
I don’t know this reason.
My System
OS: Fedora 8
Cuda version: 2.1
Device: Tesla C870
#include <stdio.h>
#include <stdlib.h>
#define BLOCK_SIZE 4
#define THREAD_SIZE 10
#define TOTAL_THREAD BLOCK_SIZE * THREAD_SIZE
#define DEBUG_SIZE 1000
__global__ void test(int *debug)
{
int i;
int mi_start, mi_end;
int mi_local_start, mi_local_end;
int tid = blockIdx.x * blockDim.x + threadIdx.x;
mi_start = (blockIdx.x*DEBUG_SIZE)/BLOCK_SIZE;
mi_end = ((blockIdx.x+1)*DEBUG_SIZE)/BLOCK_SIZE;
if (blockIdx.x == BLOCK_SIZE -1)
mi_end = DEBUG_SIZE;
mi_local_start = mi_start + (threadIdx.x*(mi_end-mi_start) / THREAD_SIZE);
mi_local_end = mi_start + ((threadIdx.x + 1)*(mi_end-mi_start)/ THREAD_SIZE);
for ( i = mi_local_start; i < mi_local_end; i++ )
{
debug[i] = tid;
}
}
int main()
{
int *debug, *g_debug;
int i;
debug = (int *)malloc(sizeof(int) * DEBUG_SIZE);
for ( i = 0; i < DEBUG_SIZE; i++ )
debug[i] = 1;
cudaMalloc((void**)&g_debug, sizeof(int) * DEBUG_SIZE);
cudaMemcpy(g_debug, debug, sizeof(int) * DEBUG_SIZE, cudaMemcpyHostToDevice);
test<<< BLOCK_SIZE, THREAD_SIZE >>>(g_debug);
cudaThreadSynchronize();
cudaMemcpy(debug, g_debug, sizeof(int) * DEBUG_SIZE, cudaMemcpyDeviceToHost);
for ( i = 0; i < DEBUG_SIZE; i++ )
printf("debug[%d] = %d\n", i, debug[i]);
free(debug);
cudaFree(g_debug);
return 0;
}