I am using CUDA 6.5 with cublas and I have been trying to get the cublasDgelsBatched function to work. However, when I call it, all I get is a segmentation fault. cuda-gdb and cuda-memcheck are not helping me resolve the issue. I have a feeling that the issue has something to do with the array of pointers, but I can’t seem to figure it out. I have consulted other batched code posts here.
http://stackoverflow.com/questions/22887167/cublas-incorrect-inversion-for-matrix-with-zero-pivot
But none of the code examples here seem to be successful. Could somebody provide me with example usage for this function? Here is the code I am attempting to use. ls_matrix->data and solutions->data are just of data type double*.
double* cuda_test_batched_ls(matrix* ls_matrix, matrix* solutions, int batch_size){
double *A[] = {ls_matrix->data};
double** A_d;
gpu_error_check(cudaMalloc<double*>(&A_d, sizeof(A)));
gpu_error_check(cudaMemcpy(A_d, A, sizeof(A), cudaMemcpyHostToDevice));
double *C[] = {solutions->data};
double** C_d;
gpu_error_check(cudaMalloc<double*>(&C_d, sizeof(C)));
gpu_error_check(cudaMemcpy(C_d, C, sizeof(C), cudaMemcpyHostToDevice));
cublasStatus_t status;
cublasHandle_t handle;
int* cublas_error_info = 0;
status = cublasCreate_v2(&handle);
if (status != CUBLAS_STATUS_SUCCESS){
puts(cublas_get_error_string(status));
}
status = cublasDgelsBatched(handle, CUBLAS_OP_N, ls_matrix->rows, ls_matrix->columns, 1, A_d, 3, C_d, 3, cublas_error_info, NULL, 1);
if (status != CUBLAS_STATUS_SUCCESS){
puts(cublas_get_error_string(status));
}
gpu_error_check(cudaMalloc<double*>(&C_d, sizeof(C)));
gpu_error_check(cudaMemcpy(C, C_d, sizeof(C), cudaMemcpyDeviceToHost));
return C[0];
}