hi all
any help will be appreciated
[codebox]#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include “text_write.h”
#define ELEMENTS 100
int main (void){
int intarray[ELEMENTS], int1_GPU[ELEMENTS];
int i=0;
/*create a matrix of ELEMENTS for each of above types randomly*/
for (i=0; i<ELEMENTS;i++){
intarray[i]=(int)rand();/*create random array*/
}
if (test(intarray,int1_GPU, ELEMENTS))
printf("Error\n");
text_write("Input array", NEW_CODE, intarray);
text_write("GPU output", NEW_CODE, int1_GPU);
return 1;
}[/codebox]
my cuda file:
#include “cuda.h”
#include <cuda_runtime_api.h>
#include “text_write.h”
#include “cutil.h”
#include <stdio.h>
global void test_func (int *data, int elements);
int test(int *in, int *out, int nbpix){
int *dev_i=NULL;
dim3 dimBlock(512,1 );
dim3 dimGrid(1,1);
CUDA_SAFE_CALL(cudaMalloc((void **)&dev_i, sizeof(int)*nbpix));/*allocate memory in GPU*/
CUT_CHECK_ERROR("cudaMalloc failed");
CUDA_SAFE_CALL(cudaMemcpy(dev_i,in,sizeof(int)*nbpix,cudaMem
cpyHostToDevice));/copy to GPU/
CUT_CHECK_ERROR("cudaMemcpy failed");
test_func<<<dimGrid, dimBlock>>>(dev_i, nbpix);
CUDA_SAFE_CALL(cudaMemcpy(out,dev_i,sizeof(int)*nbpix,cudaMe
mcpyDeviceToHost));/copy back to normal memory/
CUT_CHECK_ERROR("cudaMemcpy failed");
cudaFree(dev_i);/*free cuda pointer*/
return 0;
}
global void test_func (int *data, int elements){
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i<elements){
data[i]=2;/*simply write 2 in the array*/
}
}
can someone please tell me what im doing wrong here. everytime i do
cudaMemcpy(dev_i,in,sizeof(int)*nbpix,cudaMemcpyHostToDevice
) and cudaMemcpy(out,dev_i,sizeof(int)*nbpix,cudaMemcpyDeviceToHos
t) the only thing that happens is that the input vector is copied on to the output vector. is memcpy going bezerk??
gahhh… driving me mad this thing… i honestly cannot find the mistake… the ouputs supposed to be only a int array[100] where all elements are 2.
any help at all will be appreciated
sachin