CUBLAS Initilization Error Compiling in Emulation Mode

Hi,

I am new to CUDA and to start with I am trying to execute a simple CUBLAS program given in the SDK. The code simpleCUBLAS.c is given below for your reference.

When I am compiling the code as: nvcc -deviceemu -lcublas simpleCUBLAS.c -o output I am getting the following message;

simpleCUBLAS test running…

!!! CUBLAS initialization error

Is this happening because I don’t have any GPU? If yes why is the Emulation not successful here even though I can run many other CUDA program in emulation mode?

I shall be thankful to your replies,

Thanks,

Geetansh

//simpleCUBLAS.c

/* Includes, system */

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

/* Includes, cuda */

#include "cublas.h"

/* Matrix size */

#define N  (275)

/* Host implementation of a simple version of sgemm */

static void simple_sgemm(int n, float alpha, const float *A, const float *B,

						 float beta, float *C)

{

	int i;

	int j;

	int k;

	for (i = 0; i < n; ++i) {

		for (j = 0; j < n; ++j) {

			float prod = 0;

			for (k = 0; k < n; ++k) {

				prod += A[k * n + i] * B[j * n + k];

			}

			C[j * n + i] = alpha * prod + beta * C[j * n + i];

		}

	}

}

/* Main */

int main()

{	

	cublasStatus status;

	float* h_A;

	float* h_B;

	float* h_C;

	float* h_C_ref;

	float* d_A = 0;

	float* d_B = 0;

	float* d_C = 0;

	float alpha = 1.0f;

	float beta = 0.0f;

	int n2 = N * N;

	int i;

	float error_norm;

	float ref_norm;

	float diff;

	/* Initialize CUBLAS */

	printf("simpleCUBLAS test running..\n");

	status = cublasInit();

	if (status != CUBLAS_STATUS_SUCCESS) {

		fprintf (stderr, "!!!! CUBLAS initialization error\n");

		return EXIT_FAILURE;

	}

	/* Allocate host memory for the matrices */

	h_A = (float*)malloc(n2 * sizeof(h_A[0]));

	if (h_A == 0) {

		fprintf (stderr, "!!!! host memory allocation error (A)\n");

		return EXIT_FAILURE;

	}

	h_B = (float*)malloc(n2 * sizeof(h_B[0]));

	if (h_B == 0) {

		fprintf (stderr, "!!!! host memory allocation error (B)\n");

		return EXIT_FAILURE;

	}

	h_C = (float*)malloc(n2 * sizeof(h_C[0]));

	if (h_C == 0) {

		fprintf (stderr, "!!!! host memory allocation error (C)\n");

		return EXIT_FAILURE;

	}

	/* Fill the matrices with test data */

	for (i = 0; i < n2; i++) {

		h_A[i] = rand() / (float)RAND_MAX;

		h_B[i] = rand() / (float)RAND_MAX;

		h_C[i] = rand() / (float)RAND_MAX;

	}

	/* Allocate device memory for the matrices */

	status = cublasAlloc(n2, sizeof(d_A[0]), (void**)&d_A);

	if (status != CUBLAS_STATUS_SUCCESS) {

		fprintf (stderr, "!!!! device memory allocation error (A)\n");

		return EXIT_FAILURE;

	}

	status = cublasAlloc(n2, sizeof(d_B[0]), (void**)&d_B);

	if (status != CUBLAS_STATUS_SUCCESS) {

		fprintf (stderr, "!!!! device memory allocation error (B)\n");

		return EXIT_FAILURE;

	}

	status = cublasAlloc(n2, sizeof(d_C[0]), (void**)&d_C);

	if (status != CUBLAS_STATUS_SUCCESS) {

		fprintf (stderr, "!!!! device memory allocation error (C)\n");

		return EXIT_FAILURE;

	}

	/* Initialize the device matrices with the host matrices */

	status = cublasSetVector(n2, sizeof(h_A[0]), h_A, 1, d_A, 1);

	if (status != CUBLAS_STATUS_SUCCESS) {

		fprintf (stderr, "!!!! device access error (write A)\n");

		return EXIT_FAILURE;

	}

	status = cublasSetVector(n2, sizeof(h_B[0]), h_B, 1, d_B, 1);

	if (status != CUBLAS_STATUS_SUCCESS) {

		fprintf (stderr, "!!!! device access error (write B)\n");

		return EXIT_FAILURE;

	}

	status = cublasSetVector(n2, sizeof(h_C[0]), h_C, 1, d_C, 1);

	if (status != CUBLAS_STATUS_SUCCESS) {

		fprintf (stderr, "!!!! device access error (write C)\n");

		return EXIT_FAILURE;

	}

	

	/* Performs operation using plain C code */

	simple_sgemm(N, alpha, h_A, h_B, beta, h_C);

	h_C_ref = h_C;

	/* Clear last error */

	cublasGetError();

	/* Performs operation using cublas */

	cublasSgemm('n', 'n', N, N, N, alpha, d_A, N, d_B, N, beta, d_C, N);

	status = cublasGetError();

	if (status != CUBLAS_STATUS_SUCCESS) {

		fprintf (stderr, "!!!! kernel execution error.\n");

		return EXIT_FAILURE;

	}

	

	/* Allocate host memory for reading back the result from device memory */

	h_C = (float*)malloc(n2 * sizeof(h_C[0]));

	if (h_C == 0) {

		fprintf (stderr, "!!!! host memory allocation error (C)\n");

		return EXIT_FAILURE;

	}

	/* Read the result back */

	status = cublasGetVector(n2, sizeof(h_C[0]), d_C, 1, h_C, 1);

	if (status != CUBLAS_STATUS_SUCCESS) {

		fprintf (stderr, "!!!! device access error (read C)\n");

		return EXIT_FAILURE;

	}

	/* Check result against reference */

	error_norm = 0;

	ref_norm = 0;

	for (i = 0; i < n2; ++i) {

		diff = h_C_ref[i] - h_C[i];

		error_norm += diff * diff;

		ref_norm += h_C_ref[i] * h_C_ref[i];

	}

	error_norm = (float)sqrt((double)error_norm);

	ref_norm = (float)sqrt((double)ref_norm);

	if (fabs(ref_norm) < 1e-7) {

		fprintf (stderr, "!!!! reference norm is 0\n");

		return EXIT_FAILURE;

	}

	printf( "Test %s\n", (error_norm / ref_norm < 1e-6f) ? "PASSED" : "FAILED");

	/* Memory clean up */

	free(h_A);

	free(h_B);

	free(h_C);

	free(h_C_ref);

	status = cublasFree(d_A);

	if (status != CUBLAS_STATUS_SUCCESS) {

		fprintf (stderr, "!!!! memory free error (A)\n");

		return EXIT_FAILURE;

	}

	status = cublasFree(d_B);

	if (status != CUBLAS_STATUS_SUCCESS) {

		fprintf (stderr, "!!!! memory free error (B)\n");

		return EXIT_FAILURE;

	}

	status = cublasFree(d_C);

	if (status != CUBLAS_STATUS_SUCCESS) {

		fprintf (stderr, "!!!! memory free error (C)\n");

		return EXIT_FAILURE;

	}

	/* Shutdown */

	status = cublasShutdown();

	if (status != CUBLAS_STATUS_SUCCESS) {

		fprintf (stderr, "!!!! shutdown error (A)\n");

		return EXIT_FAILURE;

	}

}

You have to link with the emulation enabled version of cublas, at least on linux it is called libcublasemu.so, so replace -lcublas with -lcublasemu.

Thank you very much. This solved my problem!!

I have same problem (!!! initialization error), i am trying same program and i use emulation mode in Visual studio 2008. I have configured it for cublas there are no compilation errors but while executing it says initialization error. Please tell me how to make it work in emulation mode in visual studio 2008.

Hello all.

I have the same problem when I compile the simpleCuda example under Linux CentOS 5.2 Cuda 3.0 version.

I use this commands to compile the example:

nvcc ./simpleCUBLAS.c -o simpleCUBLAS -I/usr/local/cuda/include -L/usr/local/cuda/lib64 -lcudartemu -lcublasemu -deviceemu

and

nvcc ./simpleCUBLAS.c -o simpleCUBLAS -I/usr/local/cuda/include -L/usr/local/cuda/lib64 -lcudartemu -lcublasemu

All dependenses are correct

libcudartemu.so.3 => /usr/local/cuda/lib64/libcudartemu.so.3 (0x00002ab850e86000)

libcublasemu.so.3 => /usr/local/cuda/lib64/libcublasemu.so.3 (0x00002ab8510c1000)

But still it gives

simpleCUBLAS test running..

!!!! CUBLAS initialization error