Hi,
I’m new and have no experience with cuda. I try to parallelize the calculation of euclidian distance (sum((x_i -y_i)^2)) of two float vectors (called fFeature1 & fFeature2).
I split it in 3 tasks: substract, square and sum_reduction:
__global__ void SubstractAndSquare(float* fFeature1, float* fFeature2, float* fTarget) {
int i = threadIdx.x;
fTarget[i] = fFeature1[i] - fFeature2[i];
fTarget[i] = fTarget[i] * fTarget[i];
}
void Euclid_dist_gpu(float* fFeature1, float* fFeature2, int iFeatureLen1, float* fDist) {
float* fTarget = NULL;
float* devFeature1 = NULL;
float* devFeature2 = NULL;
dim3 dimBlock(iFeatureLen1);
CudaSafeCall( cudaMalloc((void**) &fTarget, sizeof(float) * iFeatureLen1));
CudaSafeCall( cudaMalloc((void**) &devFeature1, sizeof(float) * iFeatureLen1));
CudaSafeCall( cudaMalloc((void**) &devFeature2, sizeof(float) * iFeatureLen1));
CudaSafeCall( cudaMemcpy(devFeature1, fFeature1, sizeof(float) * iFeatureLen1, cudaMemcpyHostToDevice));
CudaSafeCall( cudaMemcpy(devFeature2, fFeature2, sizeof(float) * iFeatureLen1, cudaMemcpyHostToDevice));
SubstractAndSquare<<<1, dimBlock>>>(devFeature1, devFeature2, fTarget);
//sum_reduction(fTarget) with sdk example
}
Unfortunatly I can’t test my parallelization of SubstractAndSquare because the vector devFeature1 and devFeature2 are still initialized to zero after cudaMemcpy.
What am I doing wrong??
CudaSafeCall is a functional implementation of the macro CUDA_SAFE_CALL and it returns no errors:
cudaError CudaSafeCallNoSync(cudaError iCudaRC) {
Singleton* pExtProcSession = Singleton::getInstance();
char cMessage[512];
cudaError err = iCudaRC;
if( cudaSuccess != err) {
sprintf(cMessage, "Cuda error in file '%s' in line %i : %s.\n", __FILE__, __LINE__, cudaGetErrorString( err) );
strcat(pExtProcSession->pLog->cErrorMessage, cMessage);
}
return iCudaRC;
}
cudaError CudaSafeCall(cudaError iCudaRC) {
Singleton* pExtProcSession = Singleton::getInstance();
char cMessage[512];
CudaSafeCallNoSync(iCudaRC);
cudaError err = cudaThreadSynchronize();
if( cudaSuccess != err) {
sprintf(cMessage, "Cuda error in file '%s' in line %i : %s.\n", __FILE__, __LINE__, cudaGetErrorString( err) );
strcat(pExtProcSession->pLog->cErrorMessage, cMessage);
}
return iCudaRC;
}
Thanks in advance.