cudaMemcpy not working?

hi all

any help will be appreciated

[codebox]#include <stdio.h>

#include <stdlib.h>

#include <malloc.h>

#include “text_write.h”

#define ELEMENTS 100

int main (void){

int intarray[ELEMENTS], int1_GPU[ELEMENTS];

int i=0;



/*create a matrix of ELEMENTS for each of above types randomly*/

for (i=0; i<ELEMENTS;i++){

	intarray[i]=(int)rand();/*create random array*/

}

if (test(intarray,int1_GPU, ELEMENTS))

	printf("Error\n");

text_write("Input array", NEW_CODE, intarray);

text_write("GPU output", NEW_CODE, int1_GPU);

return 1;

}[/codebox]

my cuda file:

#include “cuda.h”

#include <cuda_runtime_api.h>

#include “text_write.h”

#include “cutil.h”

#include <stdio.h>

global void test_func (int *data, int elements);

int test(int *in, int *out, int nbpix){

int *dev_i=NULL;

dim3 dimBlock(512,1 );

dim3 dimGrid(1,1);

CUDA_SAFE_CALL(cudaMalloc((void **)&dev_i, sizeof(int)*nbpix));/*allocate memory in GPU*/

CUT_CHECK_ERROR("cudaMalloc failed");

CUDA_SAFE_CALL(cudaMemcpy(dev_i,in,sizeof(int)*nbpix,cudaMem

cpyHostToDevice));/copy to GPU/

CUT_CHECK_ERROR("cudaMemcpy failed");

test_func<<<dimGrid, dimBlock>>>(dev_i, nbpix);

CUDA_SAFE_CALL(cudaMemcpy(out,dev_i,sizeof(int)*nbpix,cudaMe

mcpyDeviceToHost));/copy back to normal memory/

CUT_CHECK_ERROR("cudaMemcpy failed");

cudaFree(dev_i);/*free cuda pointer*/

return 0;

}

global void test_func (int *data, int elements){

int i = blockIdx.x * blockDim.x + threadIdx.x;

if (i<elements){

	data[i]=2;/*simply write 2 in the array*/

}

}

can someone please tell me what im doing wrong here. everytime i do

cudaMemcpy(dev_i,in,sizeof(int)*nbpix,cudaMemcpyHostToDevice

) and cudaMemcpy(out,dev_i,sizeof(int)*nbpix,cudaMemcpyDeviceToHos

t) the only thing that happens is that the input vector is copied on to the output vector. is memcpy going bezerk??

gahhh… driving me mad this thing… i honestly cannot find the mistake… the ouputs supposed to be only a int array[100] where all elements are 2.

any help at all will be appreciated

sachin

tried it and this code does exactly what u wnat it to…

it fills the output array with 2…

why do u say it doesn’t work ??

are you checking the write values… ?? check your text_write.h to seee if ypu are writing out the correct values ??

no way??? lol- i spent whole day and it didnt work.

btw did u run this in emulation mode- cos it worked for me in emulation just not in actual mode

nope ran it on a quadro fx 5600

it ran absolutely fine…instead of using ur main function i wrote up a simpler one just to call the test - function and checked the output array by printing it out…

it was fine… so i i suspected the text_write.h file, did you check the output array within the test function ??

hmm…it worked on the emulation mode, then maybe the kernel failed to execute on your device ?? CUDA has this very bad feature of being very silent about a kernel failing to execute…it doesn’t cry out abt it any way…so you would never know what happened…

btw which card are using ??

just try including at the end of your test function…cutilCheckMSg (“Kernel failed to execute !!”)

maybe that might help ?