Hello,
I ve written a code to do solution of system of linear equation, and the first part of it is to do LU decomposition using CUBLAS getrfbatched(). This CUBLAS call is completing properly, but when i try to copy the LU decomposed matrix back to host using cudamemcpy(), its throwing an error:Illegal memory access was encountered. Please help me on this… Attaching the relevant part of the code below
////////////////////***/////////////////////////////////////////////
////////////////////////////////////////////////////////////////////
//generating and initalizing the required vectors in the host
PivotArray = (int *) malloc(Size * sizeof(int));
//*********Starting CUDA Profiler*****////////
// cudaProfilerStart();
//allocation of device memory
CUDA_SAFE_CALL(cudaMalloc( (void**)&devinfoArray, sizeof(int)));
CUDA_SAFE_CALL(cudaMalloc( (void**)&devPivotArray,Size * sizeof(int)));
CUDA_SAFE_CALL(cudaMalloc( (void**)&devCoeffMat,Size * Size * sizeof(double)));
//copying host to device
CUDA_SAFE_CALL(cudaMemcpy((void*)devCoeffMat, (void*)gmat, Size*Size*sizeof(double) , cudaMemcpyHostToDevice ));
//recording start time
gettimeofday(&tv, NULL);
double t1=tv.tv_sec+(tv.tv_usec/1000000.0);
cudastat = cublasDgetrfBatched(handle,Size,&devCoeffMat,Size,devPivotArray,devinfoArray,1);
if (cudastat != CUBLAS_STATUS_SUCCESS) fprintf(stderr,"error in getting LU decomposition : %i\n",cudastat);
else printf("LU success \n");
CUDA_SAFE_CALL(cudaMemcpy(gmat,devCoeffMat, Size*Size*sizeof(double), cudaMemcpyDeviceToHost)); // This is where error is shown
CUDA_SAFE_CALL(cudaMemcpy((void*)infoArray, (void*)devinfoArray, sizeof(int) , cudaMemcpyDeviceToHost)); // This is where error is shown
CUDA_SAFE_CALL(cudaMemcpy((void*)PivotArray,(void*)devPivotArray, Size*sizeof(int) , cudaMemcpyDeviceToHost)); // This is where error is shown
//
printf(“Printing LU decomposed Matrix \n”);
for(i=0;i<Size;i++)
{
for(j=0;j<Size;j++)
{
printf(“%f “,gmat[i*Size+j]);
}
printf(”\n”);
}
//recording stop time
gettimeofday(&tv,NULL);
double t2=tv.tv_sec+(tv.tv_usec/1000000.0);
//calculating time taken for computation
timing = t2 - t1;
// printf(“\n The solution Vector V is \n”);
// for(i=0; i<Size; i++)
// {
// printf(" %f \n",solutionVect[i]);
// }
cudaFree(devCoeffMat);
cudaFree(devinfoArray);
cudaFree(devPivotArray);
I ve commented on the cudamemcpy lines where it is throwing error.
Thanks again.
Jeebu