The following is the code I am using for testing CUBLAS. However, i found the CUBLAS always take 0 ms no matter what the array size. :blink: Is it correct or is it some mistake in my code? :wacko:
/* Includes, system */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
/* Includes, cuda */
#include “cublas.h”
/* Main /
int main(int argc, char* argv)
{
cublasStatus status;
float* h_A;
float* h_B;
float* h_C;
float* d_A = 0;
float* d_B = 0;
float* d_C = 0;
float alpha = 1.0f;
float beta = 0.0f;
int n2;
int i, size; /* Matrix size */
clock_t time_ffsum_start, time_ffsum_end ;
double looptime_ffsum ;
size = 4000;
n2 = size * size;
/* Initialize CUBLAS */
printf("simpleCUBLAS test running..\n");
status = cublasInit();
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! CUBLAS initialization error\n");
return EXIT_FAILURE;
}
/* Allocate host memory for the matrices */
h_A = (float*)malloc(n2 * sizeof(h_A[0]));
if (h_A == 0) {
fprintf (stderr, "!!!! host memory allocation error (A)\n");
return EXIT_FAILURE;
}
h_B = (float*)malloc(n2 * sizeof(h_B[0]));
if (h_B == 0) {
fprintf (stderr, "!!!! host memory allocation error (B)\n");
return EXIT_FAILURE;
}
h_C = (float*)malloc(n2 * sizeof(h_C[0]));
if (h_C == 0) {
fprintf (stderr, "!!!! host memory allocation error ©\n");
return EXIT_FAILURE;
}
/* Fill the matrices with test data */
for (i = 0; i < n2; i++) {
h_A[i] = 1.0 ;
h_B[i] = 1.0 ;
h_C[i] = 0.0 ;
}
/* Allocate device memory for the matrices */
status = cublasAlloc(n2, sizeof(d_A[0]), (void**)&d_A);
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! device memory allocation error (A)\n");
return EXIT_FAILURE;
}
status = cublasAlloc(n2, sizeof(d_B[0]), (void**)&d_B);
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! device memory allocation error (B)\n");
return EXIT_FAILURE;
}
status = cublasAlloc(n2, sizeof(d_C[0]), (void**)&d_C);
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! device memory allocation error ©\n");
return EXIT_FAILURE;
}
/* Initialize the device matrices with the host matrices */
status = cublasSetVector(n2, sizeof(h_A[0]), h_A, 1, d_A, 1);
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! device access error (write A)\n");
return EXIT_FAILURE;
}
status = cublasSetVector(n2, sizeof(h_B[0]), h_B, 1, d_B, 1);
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! device access error (write B)\n");
return EXIT_FAILURE;
}
status = cublasSetVector(n2, sizeof(h_C[0]), h_C, 1, d_C, 1);
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! device access error (write C)\n");
return EXIT_FAILURE;
}
/* Clear last error */
cublasGetError();
/* Performs operation using cublas */
time_ffsum_start = clock(); // begin timing ffsum loop
cublasSgemm('n', 'n', size, size, size, alpha, d_A, size, d_B, size, beta, d_C, size);
time_ffsum_end = clock(); // end timing ffsum loop
status = cublasGetError();
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! kernel execution error.\n");
return EXIT_FAILURE;
}
/* Allocate host memory for reading back the result from device memory */
h_C = (float*)malloc(n2 * sizeof(h_C[0]));
if (h_C == 0) {
fprintf (stderr, "!!!! host memory allocation error ©\n");
return EXIT_FAILURE;
}
/* Read the result back */
status = cublasGetVector(n2, sizeof(h_C[0]), d_C, 1, h_C, 1);
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! device access error (read C)\n");
return EXIT_FAILURE;
}
/*Printing the result*/
looptime_ffsum = (double)(time_ffsum_end - time_ffsum_start);
printf("\t %f \n", looptime_ffsum );
/* for (i = 0; i < n2; i++) {
printf("%f \t", *(h_C + i));
}*/
/* Memory clean up */
free(h_A);
free(h_B);
free(h_C);
status = cublasFree(d_A);
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! memory free error (A)\n");
return EXIT_FAILURE;
}
status = cublasFree(d_B);
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! memory free error (B)\n");
return EXIT_FAILURE;
}
status = cublasFree(d_C);
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! memory free error ©\n");
return EXIT_FAILURE;
}
/* Shutdown */
status = cublasShutdown();
if (status != CUBLAS_STATUS_SUCCESS) {
fprintf (stderr, "!!!! shutdown error (A)\n");
return EXIT_FAILURE;
}
if (argc > 1) {
if (!strcmp(argv[1], "-noprompt") ||
!strcmp(argv[1], "-qatest") )
{
return EXIT_SUCCESS;
}
}
else
{
printf("\nPress ENTER to exit...\n");
getchar();
}
return EXIT_SUCCESS;
}