I’m using CUDA 9.0 and trying to get cusparseCsrmvEx working for load-balanced matrix vector multiplication - but it is failing on the matrix multiplication step. Here is what I have:

```
size_t workInBytes = 0;
status = cusparseCsrmvEx_bufferSize(handle,
CUSPARSE_ALG1,
CUSPARSE_OPERATION_NON_TRANSPOSE,
N, input_size, nnz,
&s_one, CUDA_R_32F,
descr,
AA_devptr, CUDA_R_32F,
IA_devptr,
JA_devptr,
input_vec_imag_devptr, CUDA_R_32F,
&s_zero, CUDA_R_32F,
&s_zero, CUDA_R_32F,
CUDA_R_32F,
&workInBytes);
if (status != CUSPARSE_STATUS_SUCCESS) {
CLEANUP("buffer calculation failed");
return 1;
}
status = cusparseCsrmvEx(handle,
CUSPARSE_ALG1,
CUSPARSE_OPERATION_NON_TRANSPOSE,
N, input_size, nnz,
&s_one, CUDA_R_32F,
descr,
AA_devptr, CUDA_R_32F,
IA_devptr,
JA_devptr,
input_vec_imag_devptr, CUDA_R_32F,
&s_zero, CUDA_R_32F,
&s_zero, CUDA_R_32F,
CUDA_R_32F,
&workInBytes);
if (status != CUSPARSE_STATUS_SUCCESS) {
CLEANUP("Matrix-vector multiplication failed");
return 1;
}
```

Am I using the function incorrectly?