Seg Fault / No source available for "cudaProfilerInitialize()

Hi all,

I firstly have to mention that I am pretty new to cuda programming . I have a segmentation fault when i debug saying "No source available for “cudaProfilerInitialize() at 0x4520a0” What does this mean exactly? I think memory allocation and parameter settings are correct because i used the same stuff in the previous steps of code and had no problem at all. I put the part of the code here where seg fault starts to show up. Thanks in advance!

double *hVsvd = (double *)malloc(N * N * sizeof(double));
            		double *hSsvd = (double *)malloc(N  * sizeof(double));

            		gpuErrchk(cudaMemcpy(hVsvd, dVsvd, N * N * sizeof(double), cudaMemcpyDeviceToHost));
            		gpuErrchk(cudaMemcpy(hSsvd, dSsvd, std::min(M,N) * sizeof(double), cudaMemcpyDeviceToHost));

            		std::cout << "fdsssfd\n" << std::flush;

            	   // for(int i=0; i <N; i++)
            		//	std::cout << hVsvd[i] << std::endl;

            		double* dV0;
            		gpuErrchk(cudaMalloc((void**)&dV0, K * N * sizeof(double)));

            		statusCublas = cublasSetMatrix(K,N,sizeof(double),hVsvd, N, dV0, K);

            		if (statusCublas != CUBLAS_STATUS_SUCCESS) {
            		          fprintf (stderr, "!!!! device memory allocation error (A)\n");
            		          exit(EXIT_FAILURE);
            		                      }

            		printf("cublassetmatrix: %s\n",cudaGetErrorString(cudaGetLastError()));
            		printf("Sync-Threads : %s\n",cudaGetErrorString(cudaThreadSynchronize()));
            		printf("Sync-Device: %s\n", cudaGetErrorString(cudaDeviceSynchronize()));

            		double *hV0 = (double *)malloc(K * N * sizeof(double));

            		double* dV0t;
            		gpuErrchk(cudaMalloc((void**)&dV0t, N * K * sizeof(double)));

            		double* temp;
            		gpuErrchk(cudaMalloc((void**)&temp, K * N * sizeof(double)));

            		gpuErrchk(cudaMemcpy(hV0, dV0, K * N * sizeof(double), cudaMemcpyDeviceToHost));

            		statusCublas = cublasSetMatrix(K,N,sizeof(double),hV0,K, temp, K);

            				      if (status != CUBLAS_STATUS_SUCCESS) {
            				             fprintf (stderr, "!!!! device memory allocation error (A)\n");
            				             exit(EXIT_FAILURE);
            				              }
            		printf("cublassetmatrix: %s\n",cudaGetErrorString(cudaGetLastError()));
            		printf("Sync-Threads : %s\n",cudaGetErrorString(cudaThreadSynchronize()));
            		printf("Sync-Device: %s\n", cudaGetErrorString(cudaDeviceSynchronize()));

            		CublasSafeCall(cublasDgeam( handle, CUBLAS_OP_T, CUBLAS_OP_T, N, K, &alpha, temp, K, &beta, temp, K, dV0t, N));

            		printf("TRANSPOSE: %s\n",cudaGetErrorString(cudaGetLastError()));
            		            		printf("Sync-Threads : %s\n",cudaGetErrorString(cudaThreadSynchronize()));
            		            		printf("Sync-Device: %s\n", cudaGetErrorString(cudaDeviceSynchronize()));