Hello ,
cublasGemmEx() function with __dp4a can’t be called from a kernel with global keyword. It gives undefined symbol error whereas in the same file the cublasSGemm() can be called successfully.
Dynamic parallelism support is supported for cublasGemmEx() or not ?
Thnks
MAKEFILE:
/usr/local/cuda-8.0/bin/nvcc -I/usr/local/cuda-8.0/samples/common/inc/ -O3 -std=c++11 --cubin --relocatable-device-code=true -gencode arch=compute_60,code=sm_60 kernel.cu -link -L/usr/local/cuda-8.0/lib64 -lcudadevrt -lcublas -lcublas_device -dlink -o kernel.cubin
Linking Error :
/usr/local/cuda-8.0/bin/nvcc -I/usr/local/cuda-8.0/samples/common/inc/ -O3 -std=c++11 --cubin --relocatable-device-code=true -gencode arch=compute_60,code=sm_60 binary_kernels.cu -link -L/usr/local/cuda-8.0/lib64 -lcudadevrt -lcublas -lcublas_device -dlink -o kernel.cubin
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
ptxas info : ‘device-function-maxrregcount’ is a BETA feature
nvlink error : Undefined reference to ‘cublasGemmEx’ in ‘/tmp/tmpxft_000027aa_00000000-15_binary_kernels.o’
makefile:2: recipe for target ‘all’ failed
make: *** [all] Error 255
File: kernal.cu
extern “C” {
global void invokeDeviceCublasSgemm(float* A, float* B, float* C, int m, int n, int k)
{
cublasHandle_t cnpHandle;
cublasStatus_t status = cublasCreate(&cnpHandle);
float alpha = 1.0;
float beta = 0.0;
/* Perform operation using cublas */
status =
cublasSgemm(cnpHandle,
CUBLAS_OP_N, CUBLAS_OP_N,
k, m, n,
&alpha,
B, k,
A, n,
&beta,
C, k);
cublasDestroy(cnpHandle);
}
}
extern “C” {
global void invokeDeviceCublasGemmEx(char* A, char* B, int* C, int m, int n, int k)
{
cublasHandle_t cnpHandle;
cublasStatus_t status = cublasCreate(&cnpHandle);
int alpha = 1;
int beta = 0;
/* Perform operation using cublas */
status = cublasGemmEx(cnpHandle, CUBLAS_OP_N, CUBLAS_OP_N,
k, m, n,
&alpha, B, CUDA_R_8I ,k,
A, CUDA_R_8I ,n,
&beta, C, CUDA_R_32I ,k, CUDA_R_32I,CUBLAS_GEMM_DFALT);// colE x rowD
cublasDestroy(cnpHandle);
}
}