Hmm So I changed my code and did it this way. Now the array does not get updated with new values?? Any suggestions??
[codebox]// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include <assert.h>
// includes, project
#include <cufft.h>
#include <cutil.h>
void checkCUDAError(const char *msg)
{
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
{
fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) );
exit(-1);
}
}
// Kernel that executes on the CUDA device
global void alloc_array(double *a, int N)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx<N)
{
a[idx] = a[idx] + 10;
// printf("blockIdx.x =%d, blockDim.x = %d, threadIdx.x = %d\n",blockIdx.x, blockDim.x, threadIdx.x );
}
}
int main()
{
/******************** Get Device info *******************/
// check the compute capability of the device
int cuda_device = 0;
int num_devices=0;
// float elapsed_time;
CUDA_SAFE_CALL( cudaGetDeviceCount(&num_devices) );
if(0==num_devices)
{
printf("your system does not have a CUDA capable device\n");
return 1;
}
// check if the command-line chosen device ID is within range, exit if not
if( cuda_device >= num_devices )
{
printf("choose device ID between 0 and %d\n", num_devices-1);
return 1;
}
cudaSetDevice( cuda_device );
cudaDeviceProp device_properties;
CUDA_SAFE_CALL( cudaGetDeviceProperties(&device_properties, cuda_device) );
if( (1 == device_properties.major) && (device_properties.minor < 1))
{
printf("%s does not have compute capability 1.1 or later\n\n", device_properties.name);
}
printf("running on: %s\n\n", device_properties.name );
/******************* acquired device info *************/
int sizex, sizey, sizez;
sizex = 43;
sizey = 72;
sizez = sizex * sizey;
int v;
/* fill zeros for U,V,Uin,Vin*/
double *U, *V, *Uin, *Vin;
CUDA_SAFE_CALL(cudaMallocHost((void**)&U, sizeof(double) * sizez));
CUDA_SAFE_CALL(cudaMallocHost((void**)&V, sizeof(double) * sizez));
CUDA_SAFE_CALL(cudaMallocHost((void**)&Uin, sizeof(double) * sizez));
CUDA_SAFE_CALL(cudaMallocHost((void**)&Vin, sizeof(double) * sizez));
// Check for any CUDA errors
checkCUDAError("cudaMallocHost calls");
for(v = 0; v < sizez; ++v)
{
U[v] = 0;
V[v] = 0;
Uin[v] = 0;
Vin[v] = 0;
// printf("U[%d] = %f, V[%d] = %f, Uin[%d] = %f, Vin[%d] = %f \n", v, U[v], v, V[v], v, Uin[v], v, Vin[v]);
}
double *U_d, *V_d, *Uin_d, *Vin_d;
CUDA_SAFE_CALL(cudaMalloc((void**)&U_d, sizeof(double) * sizez));
CUDA_SAFE_CALL(cudaMalloc((void**)&V_d, sizeof(double) * sizez));
CUDA_SAFE_CALL(cudaMalloc((void**)&Uin_d, sizeof(double) * sizez));
CUDA_SAFE_CALL(cudaMalloc((void**)&Vin_d, sizeof(double) * sizez));
// Check for any CUDA errors
checkCUDAError("cudaMalloc calls");
cudaMemcpy(U_d, U, sizeof(double) * sizez, cudaMemcpyHostToDevice);
cudaMemcpy(V_d, V, sizeof(double) * sizez, cudaMemcpyHostToDevice);
cudaMemcpy(Uin_d, Uin, sizeof(double) * sizez, cudaMemcpyHostToDevice);
cudaMemcpy(Vin_d, Vin, sizeof(double) * sizez, cudaMemcpyHostToDevice);
// Check for any CUDA errors
checkCUDAError("cudaMemcpy calls");
// Do calculation on device:
int block_size;
int n_blocks = 30;
block_size = sizez/n_blocks + (sizez%n_blocks == 0? 0:1);
// int n_blocks = sizez/block_size + (sizez%block_size == 0 ? 0:1);
printf("n_blocks = %d\t block_size = %d\n", n_blocks, block_size);
alloc_array <<< n_blocks, block_size >>> (U_d, sizez);
// Copy U_d to host
cudaMemcpy(U, U_d, sizeof(double) * sizez, cudaMemcpyDeviceToHost);
// Check values on host
for(v = 0; v < sizez; ++v)
{
printf("U[%d] = %f, V[%d] = %f, Uin[%d] = %f, Vin[%d] = %f \n", v, U[v], v, V[v], v, Uin[v], v, Vin[v]);
}
cudaFree(U_d);
cudaFree(Uin_d);
cudaFree(V_d);
cudaFree(Vin_d);
cudaFree(U);
cudaFree(V);
cudaFree(Uin);
cudaFree(Vin);
return 0;
}
[/codebox]