is it just me or am i going mad. i send out a vector to the GPU so that it can take the absolute value. the cpu version works fine. but everytime i send it to GPU version it sends me back the exact same vector.
im guessing that somewhere its just copying the input pointer to the output pointer.
heres my test function. just look at bold section
[codebox]#include <cuda.h>
#include <cuda_runtime_api.h>
#include “text_write.h”
#define ELEMENTS 100
int main (void){
PIX_TYPE *pixarray=NULL;
INT1_TYPE *int1array=NULL, *int1_GPU=NULL, *int1_CPU=NULL;
INT4_TYPE *int4array=NULL;
UINT1_TYPE *uint1array=NULL;
UINT4_TYPE *uint4array=NULL;
DBL_TYPE *dblarray=NULL;
int i=0;
/*create a matrix of ELEMENTS for each of above types randomly*/
pixarray=(PIX_TYPE *)malloc(sizeof(PIX_TYPE)*ELEMENTS);
for (i=0; i<ELEMENTS;i++){
pixarray[i]=(PIX_TYPE)rand();
}
[b]int1array=(INT1_TYPE *)malloc(sizeof(INT1_TYPE)*ELEMENTS);
for (i=0; i<ELEMENTS;i++){
int1array[i]=(INT1_TYPE)rand();/*create random array*/
if (i%2==0)/*for every second item*/
int1array[i]=-int1array[i];/*make number negative*/
}
[/b]
int4array=(INT4_TYPE *)malloc(sizeof(INT4_TYPE)*ELEMENTS);
for (i=0; i<ELEMENTS;i++){
int4array[i]=(INT4_TYPE)rand();
}
uint1array=(UINT1_TYPE *)malloc(sizeof(UINT1_TYPE)*ELEMENTS);
for (i=0; i<ELEMENTS;i++){
uint1array[i]=(UINT1_TYPE)rand();
}
uint4array=(UINT4_TYPE *)malloc(sizeof(UINT4_TYPE)*ELEMENTS);
for (i=0; i<ELEMENTS;i++){
uint4array[i]=(UINT4_TYPE)rand();
}
[b]int1_CPU=(INT1_TYPE *)calloc(ELEMENTS,sizeof(INT1_TYPE));
int1_GPU=(INT1_TYPE *)calloc(ELEMENTS,sizeof(INT1_TYPE));[/b]
//for (i=0; i<ELEMENTS;i++){
// int1array[i]=(INT1_TYPE)rand()/rand())-5;
//}
[b]if (l_abs_gpu((void *)int1array,INT1_CODE, int1_GPU, ELEMENTS))/*l_abs_gpu returns 1 if int1array and int1GPU are the same pointer*/
printf("Error\n");
l_abs((void *)int1array,INT1_CODE, int1_CPU, ELEMENTS);[/b]
text_write("int1 array input 2nd try", INT1_CODE, int1array);
text_write("int1 abs for GPU", INT1_CODE, int1_GPU);
text_write("int1 abs for CPU", INT1_CODE, int1_CPU);
free(int1_CPU);
free(int1_GPU);
free(pixarray);
free(int1array);
free(int4array);
free(uint1array);
free(uint4array);
free(dblarray);
return 1;
}[/codebox]
Heres my kernel:
[codebox]#include <cuda.h>
#include <cuda_runtime_api.h>
#include <liarp.h>
#include “arith_const_type.h”
//prototypes
global void abs_ (void *in1, int code_in, long elements);
global void double_ (INT1_TYPE *data, int elements);
int l_abs_gpu(void in, / ptr to first input image */
int code_in, /* code of the input pixel type */
void *out, /* ptr to output image */
long nbpix) /* size of input & ouput in pixels */
{
INT1_TYPE *ci;
INT1_TYPE *dev_ci;
INT4_TYPE *ii, *dev_ii=NULL;
DBL_TYPE *di, *dev_di=NULL;
dim3 dimBlock(512,1 );
dim3 dimGrid(round_up(nbpix,dimBlock.x),1);
[b] if (code_in == INT1_CODE) {
ci = (INT1_TYPE *)in;
cudaMalloc((void **)&dev_ci, sizeof(INT1_TYPE)*nbpix);/*allocate memory in GPU*/
cudaMemcpy(dev_ci,ci,sizeof(INT1_TYPE)*nbpix,cudaMemcpyHostT
oDevice);/copy to GPU/
double_<<<dimGrid, dimBlock>>>(dev_ci, nbpix);
//abs_<<<dimGrid, dimBlock>>>(dev_ci, code_in, nbpix);/*get absolute value of elements*/
cudaMemcpy(out,dev_ci,sizeof(INT1_TYPE)*nbpix,cudaMemcpyDevi
ceToHost);/copy back to normal memory/
cudaFree(dev_ci);/*free cuda pointer*/
}[/b]
else if (code_in == INT4_CODE) {
ii = (INT4_TYPE *)in;
cudaMalloc((void **)&dev_ii, sizeof(INT4_TYPE)*nbpix);/*allocate memory in GPU*/
cudaMemcpy(dev_ii,ii,sizeof(INT4_TYPE)*nbpix,cudaMemcpyHostT
oDevice);/copy to GPU/
abs_<<<dimGrid, dimBlock>>>(dev_ii, code_in, nbpix);/*get absolute value of elements*/
cudaMemcpy(out,dev_ii,sizeof(INT4_TYPE)*nbpix,cudaMemcpyDevi
ceToHost);/copy back to normal memory/
cudaFree(dev_ii);/*free cuda pointer*/
}
else if(code_in == DBL_CODE) {
di = (DBL_TYPE *)in;
CUDA_SAFE_CALL(cudaMalloc((void **)&dev_di, sizeof(DBL_TYPE)*nbpix));/*allocate memory in GPU*/
CUT_CHECK_ERROR("cudaMalloc failed");
CUDA_SAFE_CALL(cudaMemcpy(dev_di,di,sizeof(DBL_TYPE)*nbpix,c
udaMemcpyHostToDevice));/copy to GPU/
CUT_CHECK_ERROR("cudaMemcpy failed");
abs_<<<dimGrid, dimBlock>>>(dev_di, code_in, nbpix);/*get absolute value of elements*/
CUDA_SAFE_CALL(cudaMemcpy(out,dev_di,sizeof(DBL_TYPE)*nbpix,
cudaMemcpyDeviceToHost));/copy back to normal memory/
CUT_CHECK_ERROR("cudaMemcpy failed");
cudaFree(dev_di);/*free cuda pointer*/
}
else {
LIARerror("Input type not supported\n");
return(1);
}
if (out==in)
return 1;
return 0;
}
global void abs_ (void *in1, int code_in, long elements){
INT1_TYPE *ci;/*same as char*/
INT4_TYPE *ii;/*same as int*/
DBL_TYPE *di;/*same as double*/
int i = blockIdx.x * blockDim.x + threadIdx.x;/*get thread number*/
[b] if (i<elements){/if less than elements/
if (code_in == INT1_CODE ) {
ci=(INT1_TYPE *)in1;
if (ci[i]<0)/*if negative number*/
ci[i]=-ci[i];/*times by minus one*/
//ci[i] = ci[i] >= 0 ? ci[i] : -ci[i];
} [/b]
else if (code_in == INT4_CODE) {
ii = (INT4_TYPE *)in1;
ii[i] = ii[i] >= 0 ? ii[i] : -ii[i];
}
else if (code_in == DBL_CODE) {
di = (DBL_TYPE *)in1;
di[i] = di[i] >= 0 ? di[i] : -di[i];
}
}
}
[b]/this is an alternate function to abs that i created that should double the values/
global void double_ (INT1_TYPE *data, int elements){
int i = blockIdx.x * blockDim.x + threadIdx.x;/*get thread no*/
if (i<elements){
data[i]=2*data[i];
}
}[/b][/codebox]
i doubt you will need it but the text_write function is as follows:
[codebox]void text_write(const char *msg, int type, void *data){
FILE *file=NULL;
PIX_TYPE *pixarray=NULL;
INT1_TYPE *int1array=NULL;
INT4_TYPE *int4array=NULL;
UINT1_TYPE *uint1array=NULL;
UINT4_TYPE *uint4array=NULL;
DBL_TYPE *dblarray=NULL;
int i=0;
file=fopen("test.txt","a");
fprintf(file, msg);/*type in the message as required*/
switch (type){
case PIX_CODE:
for (i=0; i<100;i++){
if (i%10==0)
fprintf(file,"\n"); /*insert new line every 10 lines*/
fprintf(file,"%d\t",((PIX_TYPE *)data)[i]);
}
break;
case INT1_CODE:
for (i=0; i<100;i++){
if (i%10==0)
fprintf(file,"\n"); /*insert new line every 10 lines*/
fprintf(file,"%d\t",((INT1_TYPE *)data)[i]);
}
break;
case INT4_CODE:
for (i=0; i<100;i++){
if (i%10==0)
fprintf(file,"\n"); /*insert new line every 10 lines*/
fprintf(file,"%d\t",((INT4_TYPE *)data)[i]);
}
break;
//case UINT1_CODE:
// for (i=0; i<100;i++){
// if (i%10==0)
// fprintf(file,"\n"); /*insert new line every 10 lines*/
// fprintf(file,"%d\t",((UINT1_TYPE *)data)[i]);
// }
// break;
case UINT4_CODE:
for (i=0; i<100;i++){
if (i%10==0)
fprintf(file,"\n"); /*insert new line every 10 lines*/
fprintf(file,"%d\t",((UINT4_TYPE *)data)[i]);
}
break;
case DBL_CODE:
for (i=0; i<100;i++){
if (i%10==0)
fprintf(file,"\n"); /*insert new line every 10 lines*/
fprintf(file,"%.4g\t",((DBL_TYPE *)data)[i]);
}
break;
default:
printf("Error:wrong code\n");
break;
}
fprintf(file,"\n"); /*insert new line*/
fclose(file);
}[/codebox]
and my errors are
[codebox]‘LIAR.exe’: Loaded ‘C:\Documents and Settings\abe044\My Documents\Visual Studio 2005\liar_arith\LIAR\win32\LIAR\Debug\LIAR.exe’, Symbols loaded.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\ntdll.dll’, No symbols loaded.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\kernel32.dll’, No symbols loaded.
‘LIAR.exe’: Loaded ‘C:\CUDA\bin\cudart.dll’, Binary was not built with debug information.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\user32.dll’, No symbols loaded.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\gdi32.dll’, No symbols loaded.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\shimeng.dll’, No symbols loaded.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\imm32.dll’, No symbols loaded.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\advapi32.dll’, No symbols loaded.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\rpcrt4.dll’, No symbols loaded.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\secur32.dll’, No symbols loaded.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\lpk.dll’, No symbols loaded.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\usp10.dll’, No symbols loaded.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\msvcrt.dll’, No symbols loaded.
‘LIAR.exe’: Unloaded ‘C:\WINDOWS\system32\shimeng.dll’
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\nvcuda.dll’, Binary was not built with debug information.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\nvapi.dll’, No symbols loaded.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\ole32.dll’, No symbols loaded.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\oleaut32.dll’, No symbols loaded.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\shlwapi.dll’, No symbols loaded.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\shell32.dll’, No symbols loaded.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\setupapi.dll’, No symbols loaded.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\version.dll’, No symbols loaded.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\WinSxS\x86_Microsoft.Windows.Common-Controls_6595b64144ccf1df_6.0.2600.2982_x-ww_ac3f9c03\comctl32.dll’, No symbols loaded.
‘LIAR.exe’: Loaded ‘C:\WINDOWS\system32\comctl32.dll’, No symbols loaded.
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError_enum at memory location 0x0013fcfc…
First-chance exception at 0x7c812a6b in LIAR.exe: Microsoft C++ exception: cudaError at memory location 0x0013fd4c…
[/codebox]
sry about the length of this post but im really desperate for some answers