cuda malloc and mecpy error or is it?

is it just me or am i going mad. i send out a vector to the GPU so that it can take the absolute value. the cpu version works fine. but everytime i send it to GPU version it sends me back the exact same vector.

im guessing that somewhere its just copying the input pointer to the output pointer.

heres my test function. just look at bold section

[codebox]#include <cuda.h>

#include <cuda_runtime_api.h>

#include “text_write.h”

#define ELEMENTS 100

int main (void){

PIX_TYPE *pixarray=NULL;

INT1_TYPE *int1array=NULL, *int1_GPU=NULL, *int1_CPU=NULL;

INT4_TYPE *int4array=NULL;

UINT1_TYPE *uint1array=NULL;

UINT4_TYPE *uint4array=NULL;

DBL_TYPE *dblarray=NULL;

int i=0;



/*create a matrix of ELEMENTS for each of above types randomly*/

pixarray=(PIX_TYPE *)malloc(sizeof(PIX_TYPE)*ELEMENTS);

for (i=0; i<ELEMENTS;i++){

	pixarray[i]=(PIX_TYPE)rand();

}

[b]int1array=(INT1_TYPE *)malloc(sizeof(INT1_TYPE)*ELEMENTS);

for (i=0; i<ELEMENTS;i++){

	int1array[i]=(INT1_TYPE)rand();/*create random array*/

	if (i%2==0)/*for every second item*/

		int1array[i]=-int1array[i];/*make number negative*/

}

[/b]

int4array=(INT4_TYPE *)malloc(sizeof(INT4_TYPE)*ELEMENTS);

for (i=0; i<ELEMENTS;i++){

	int4array[i]=(INT4_TYPE)rand();

}

uint1array=(UINT1_TYPE *)malloc(sizeof(UINT1_TYPE)*ELEMENTS);

for (i=0; i<ELEMENTS;i++){

	uint1array[i]=(UINT1_TYPE)rand();

}

uint4array=(UINT4_TYPE *)malloc(sizeof(UINT4_TYPE)*ELEMENTS);

for (i=0; i<ELEMENTS;i++){

	uint4array[i]=(UINT4_TYPE)rand();

}

[b]int1_CPU=(INT1_TYPE *)calloc(ELEMENTS,sizeof(INT1_TYPE));

int1_GPU=(INT1_TYPE *)calloc(ELEMENTS,sizeof(INT1_TYPE));[/b]

//for (i=0; i<ELEMENTS;i++){

//	int1array[i]=(INT1_TYPE)rand()/rand())-5;

//}

[b]if (l_abs_gpu((void *)int1array,INT1_CODE, int1_GPU, ELEMENTS))/*l_abs_gpu returns 1 if int1array and int1GPU are the same pointer*/

	printf("Error\n");

l_abs((void *)int1array,INT1_CODE, int1_CPU, ELEMENTS);[/b]

text_write("int1 array input 2nd try", INT1_CODE, int1array);

text_write("int1 abs for GPU", INT1_CODE, int1_GPU);

text_write("int1 abs for CPU", INT1_CODE, int1_CPU);

free(int1_CPU);

free(int1_GPU);

free(pixarray);

free(int1array);

free(int4array);

free(uint1array);

free(uint4array);

free(dblarray);

return 1;

}[/codebox]

Heres my kernel:

[codebox]#include <cuda.h>

#include <cuda_runtime_api.h>

#include <liarp.h>

#include “arith_const_type.h”

//prototypes

global void abs_ (void *in1, int code_in, long elements);

global void double_ (INT1_TYPE *data, int elements);

int l_abs_gpu(void in, / ptr to first input image */

  int code_in,  /* code of the input pixel type */

  void *out,    /* ptr to output image */

  long nbpix)   /* size of input & ouput in pixels */

{

INT1_TYPE    *ci;

INT1_TYPE *dev_ci;

INT4_TYPE    *ii, *dev_ii=NULL;

DBL_TYPE     *di, *dev_di=NULL;

dim3 dimBlock(512,1 );

dim3 dimGrid(round_up(nbpix,dimBlock.x),1);

[b] if (code_in == INT1_CODE) {

	ci = (INT1_TYPE *)in;

	cudaMalloc((void **)&dev_ci, sizeof(INT1_TYPE)*nbpix);/*allocate memory in GPU*/

	cudaMemcpy(dev_ci,ci,sizeof(INT1_TYPE)*nbpix,cudaMemcpyHostT

oDevice);/copy to GPU/

	double_<<<dimGrid, dimBlock>>>(dev_ci, nbpix);

	//abs_<<<dimGrid, dimBlock>>>(dev_ci, code_in, nbpix);/*get absolute value of elements*/

	cudaMemcpy(out,dev_ci,sizeof(INT1_TYPE)*nbpix,cudaMemcpyDevi

ceToHost);/copy back to normal memory/

	cudaFree(dev_ci);/*free cuda pointer*/

}[/b]

else if (code_in == INT4_CODE) {

	ii = (INT4_TYPE *)in;

	cudaMalloc((void **)&dev_ii, sizeof(INT4_TYPE)*nbpix);/*allocate memory in GPU*/

	cudaMemcpy(dev_ii,ii,sizeof(INT4_TYPE)*nbpix,cudaMemcpyHostT

oDevice);/copy to GPU/

	abs_<<<dimGrid, dimBlock>>>(dev_ii, code_in, nbpix);/*get absolute value of elements*/

	cudaMemcpy(out,dev_ii,sizeof(INT4_TYPE)*nbpix,cudaMemcpyDevi

ceToHost);/copy back to normal memory/

	cudaFree(dev_ii);/*free cuda pointer*/

}

else if(code_in == DBL_CODE) {

	di = (DBL_TYPE *)in;

	CUDA_SAFE_CALL(cudaMalloc((void **)&dev_di, sizeof(DBL_TYPE)*nbpix));/*allocate memory in GPU*/

	CUT_CHECK_ERROR("cudaMalloc failed");

	CUDA_SAFE_CALL(cudaMemcpy(dev_di,di,sizeof(DBL_TYPE)*nbpix,c

udaMemcpyHostToDevice));/copy to GPU/

	CUT_CHECK_ERROR("cudaMemcpy failed");

	abs_<<<dimGrid, dimBlock>>>(dev_di, code_in, nbpix);/*get absolute value of elements*/

	CUDA_SAFE_CALL(cudaMemcpy(out,dev_di,sizeof(DBL_TYPE)*nbpix,

cudaMemcpyDeviceToHost));/copy back to normal memory/

	CUT_CHECK_ERROR("cudaMemcpy failed");

	cudaFree(dev_di);/*free cuda pointer*/

}	

else {

  LIARerror("Input type not supported\n");

  return(1);

}

if (out==in)

	return 1;

return 0;

}

global void abs_ (void *in1, int code_in, long elements){

INT1_TYPE 	*ci;/*same as char*/

INT4_TYPE 	*ii;/*same as int*/

DBL_TYPE	*di;/*same as double*/

int i = blockIdx.x * blockDim.x + threadIdx.x;/*get thread number*/

[b] if (i<elements){/if less than elements/

	if (code_in == INT1_CODE ) {

		ci=(INT1_TYPE *)in1;

		if (ci[i]<0)/*if negative number*/

			ci[i]=-ci[i];/*times by minus one*/

		//ci[i] = ci[i] >= 0 ? ci[i] : -ci[i];

	} [/b]

	else if (code_in == INT4_CODE) {

		ii = (INT4_TYPE *)in1;

		ii[i] = ii[i] >= 0 ? ii[i] : -ii[i];

	} 

	else if (code_in == DBL_CODE) {

		di = (DBL_TYPE *)in1;

		di[i] = di[i] >= 0 ? di[i] : -di[i];

	}

}

}

[b]/this is an alternate function to abs that i created that should double the values/

global void double_ (INT1_TYPE *data, int elements){

int i = blockIdx.x * blockDim.x + threadIdx.x;/*get thread no*/

if (i<elements){

	data[i]=2*data[i];

}

}[/b][/codebox]

i doubt you will need it but the text_write function is as follows:

[codebox]void text_write(const char *msg, int type, void *data){

FILE *file=NULL;

PIX_TYPE *pixarray=NULL;

INT1_TYPE *int1array=NULL;

INT4_TYPE *int4array=NULL;

UINT1_TYPE *uint1array=NULL;

UINT4_TYPE *uint4array=NULL;

DBL_TYPE *dblarray=NULL;

int i=0;

file=fopen("test.txt","a");	

fprintf(file, msg);/*type in the message as required*/

switch (type){

	case PIX_CODE:

		for (i=0; i<100;i++){

			if (i%10==0)

				fprintf(file,"\n"); /*insert new line every 10 lines*/

			fprintf(file,"%d\t",((PIX_TYPE *)data)[i]);

		}

		break;

	case INT1_CODE:

		for (i=0; i<100;i++){

			if (i%10==0)

				fprintf(file,"\n"); /*insert new line every 10 lines*/

			fprintf(file,"%d\t",((INT1_TYPE *)data)[i]);

		}

		break;

	case INT4_CODE:

		for (i=0; i<100;i++){

			if (i%10==0)

				fprintf(file,"\n"); /*insert new line every 10 lines*/

			fprintf(file,"%d\t",((INT4_TYPE *)data)[i]);

		}

		break;

	//case UINT1_CODE:

	//	for (i=0; i<100;i++){

	//		if (i%10==0)

	//			fprintf(file,"\n"); /*insert new line every 10 lines*/

	//		fprintf(file,"%d\t",((UINT1_TYPE *)data)[i]);

	//	}

	//	break;

	case UINT4_CODE:

		for (i=0; i<100;i++){

			if (i%10==0)

				fprintf(file,"\n"); /*insert new line every 10 lines*/

			fprintf(file,"%d\t",((UINT4_TYPE *)data)[i]);

		}

		break;

	case DBL_CODE:

		for (i=0; i<100;i++){

			if (i%10==0)

				fprintf(file,"\n"); /*insert new line every 10 lines*/

			fprintf(file,"%.4g\t",((DBL_TYPE *)data)[i]);

		}

		break;

	default:

		printf("Error:wrong code\n");

		break;

}

fprintf(file,"\n"); /*insert new line*/

fclose(file);

}[/codebox]

and my errors are

[codebox]‘LIAR.exe’: Loaded ‘C:\Documents and Settings\abe044\My Documents\Visual Studio 2005\liar_arith\LIAR\win32\LIAR\Debug\LIAR.exe’, Symbols loaded.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\ntdll.dll’, No symbols loaded.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\kernel32.dll’, No symbols loaded.

‘LIAR.exe’: Loaded ‘C:\CUDA\bin\cudart.dll’, Binary was not built with debug information.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\user32.dll’, No symbols loaded.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\gdi32.dll’, No symbols loaded.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\shimeng.dll’, No symbols loaded.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\imm32.dll’, No symbols loaded.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\advapi32.dll’, No symbols loaded.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\rpcrt4.dll’, No symbols loaded.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\secur32.dll’, No symbols loaded.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\lpk.dll’, No symbols loaded.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\usp10.dll’, No symbols loaded.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\msvcrt.dll’, No symbols loaded.

‘LIAR.exe’: Unloaded ‘C:\WINDOWS\system32\shimeng.dll’

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\nvcuda.dll’, Binary was not built with debug information.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\nvapi.dll’, No symbols loaded.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\ole32.dll’, No symbols loaded.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\oleaut32.dll’, No symbols loaded.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\shlwapi.dll’, No symbols loaded.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\shell32.dll’, No symbols loaded.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\setupapi.dll’, No symbols loaded.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\version.dll’, No symbols loaded.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\WinSxS\x86_Microsoft.Windows.Common-Controls_6595b64144ccf1df_6.0.2600.2982_x-ww_ac3f9c03\comctl32.dll’, No symbols loaded.

‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\comctl32.dll’, No symbols loaded.

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…

First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError at memory location 0x0013fd4c…

[/codebox]

sry about the length of this post but im really desperate for some answers

ok well aftert some research iver narrowed the error down to one POSSIBLE ERROR. tell me if im right… i cant test it out myself since i dont have a nvidia card at home. and all work done above was at work.

neways… seems something to do with the fact that im not using cudaSymbolCpy?? from what i understnd it seems that its telling me that im sending a CPU pointer to the GPU (which i get is nto what your meant to do). what i thought would happen is if you declare say GPU_pointer=NULL then you can just give it a GPU address at will. any thoughts?