I am running the beginner cublas program.
[b]#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cublas.h>
#define IDX2C(i,j,ld) (((j)*(ld))+(i))
void modify (float *m, int ldm, int n, int p, int q, float alpha,float beta)
{
cublasSscal (n-p, alpha, &m[IDX2C(p,q,ldm)], ldm);
cublasSscal (ldm-p, beta, &m[IDX2C(p,q,ldm)], 1);
}
int main(int argc, char argv[])
{
int i, j;
cublasStatus stat;
float devPtrA;
float* a = 0;
a = (float )malloc (M * N * sizeof (a));
if (!a) {
printf (“host memory allocation failed”);
return 1;
}
for (j = 0; j < N; j++) {
for (i = 0; i < M; i++) {
a[IDX2C(i,j,M)] = i * M + j + 1;
}
}
cublasInit();
stat=cublasAlloc (MN, sizeof(a), (void)&devPtrA);
//printf(“%s”,stat);
if (stat!= CUBLAS_STATUS_SUCCESS) {
printf (“device memory allocation failed”);
return 1;
}
cublasSetMatrix (M, N, sizeof(*a), a, M, devPtrA, M);
modify (devPtrA, M, N, 1, 2, 16.0f, 12.0f);
cublasGetMatrix (M, N, sizeof(*a), devPtrA, M, a, M);
cublasFree (devPtrA);
cublasShutdown();
for (j = 0; j < N; j++) {
for (i = 0; i < M; i++) {
printf (“%7.0f”, a[IDX2C(i,j,M)]);
}
printf (“\n”);
}
return 0;
}
[/b]
i am compiling using the command
nvcc -deviceemu -o modify modify.cu -lcublas
while running i am getting the error
device memory not allocated…
any suggestions???