Hi all,
I firstly have to mention that I am pretty new to cuda programming . I have a segmentation fault when i debug saying "No source available for “cudaProfilerInitialize() at 0x4520a0” What does this mean exactly? I think memory allocation and parameter settings are correct because i used the same stuff in the previous steps of code and had no problem at all. I put the part of the code here where seg fault starts to show up. Thanks in advance!
double *hVsvd = (double *)malloc(N * N * sizeof(double));
double *hSsvd = (double *)malloc(N * sizeof(double));
gpuErrchk(cudaMemcpy(hVsvd, dVsvd, N * N * sizeof(double), cudaMemcpyDeviceToHost));
gpuErrchk(cudaMemcpy(hSsvd, dSsvd, std::min(M,N) * sizeof(double), cudaMemcpyDeviceToHost));
std::cout << "fdsssfd\n" << std::flush;
// for(int i=0; i <N; i++)
// std::cout << hVsvd[i] << std::endl;
double* dV0;
gpuErrchk(cudaMalloc((void**)&dV0, K * N * sizeof(double)));
statusCublas = cublasSetMatrix(K,N,sizeof(double),hVsvd, N, dV0, K);
if (statusCublas != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! device memory allocation error (A)\n");
exit(EXIT_FAILURE);
}
printf("cublassetmatrix: %s\n",cudaGetErrorString(cudaGetLastError()));
printf("Sync-Threads : %s\n",cudaGetErrorString(cudaThreadSynchronize()));
printf("Sync-Device: %s\n", cudaGetErrorString(cudaDeviceSynchronize()));
double *hV0 = (double *)malloc(K * N * sizeof(double));
double* dV0t;
gpuErrchk(cudaMalloc((void**)&dV0t, N * K * sizeof(double)));
double* temp;
gpuErrchk(cudaMalloc((void**)&temp, K * N * sizeof(double)));
gpuErrchk(cudaMemcpy(hV0, dV0, K * N * sizeof(double), cudaMemcpyDeviceToHost));
statusCublas = cublasSetMatrix(K,N,sizeof(double),hV0,K, temp, K);
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! device memory allocation error (A)\n");
exit(EXIT_FAILURE);
}
printf("cublassetmatrix: %s\n",cudaGetErrorString(cudaGetLastError()));
printf("Sync-Threads : %s\n",cudaGetErrorString(cudaThreadSynchronize()));
printf("Sync-Device: %s\n", cudaGetErrorString(cudaDeviceSynchronize()));
CublasSafeCall(cublasDgeam( handle, CUBLAS_OP_T, CUBLAS_OP_T, N, K, &alpha, temp, K, &beta, temp, K, dV0t, N));
printf("TRANSPOSE: %s\n",cudaGetErrorString(cudaGetLastError()));
printf("Sync-Threads : %s\n",cudaGetErrorString(cudaThreadSynchronize()));
printf("Sync-Device: %s\n", cudaGetErrorString(cudaDeviceSynchronize()));