I am trying to adapt the example from the bottom of the page from the following topic https://devtalk.nvidia.com/default/topic/1024278/can-a-cuda-kernel-call-cublas-function-or-how-to-call-a-cublas-functions-from-python-/ to CUDA 10 where you cannot call cublas from the device. The cuda code should implement matrix multiplication and with ctypes the python code should be able to pass the input to the cuda code. My adapted cuda code is
#include <cublas_v2.h>
typedef float ftype;
extern "C"
void run(ftype *i1, ftype *i2, ftype *o1, int d);
void run(ftype *i1, ftype *i2, ftype *o1, int d){
ftype *d_i1, *d_i2, *d_o1;
int ds = d*d*sizeof(ftype);
cudaMalloc(&d_i1, ds);
cudaMalloc(&d_i2, ds);
cudaMalloc(&d_o1, ds);
cudaMemcpy(d_i1, i1, ds, cudaMemcpyHostToDevice);
cudaMemcpy(d_i2, i2, ds, cudaMemcpyHostToDevice);
cublasHandle_t h;
cublasCreate(&h);
ftype alpha = 1.0;
ftype beta = 0.0;
cublasSgemm(h, CUBLAS_OP_N, CUBLAS_OP_N, d, d, d, &alpha, i1, d, i2, d, &beta, o1, d);
cudaMemcpy(o1, d_o1, ds, cudaMemcpyDeviceToHost);
cudaFree(d_i1);
cudaFree(d_i2);
cudaFree(d_o1);
}
and is a file called cublas_test.cu.
I compiled the code with
nvcc cublas_test.cu -o myapp.so -shared -Xcompiler -fPIC -lcublas
and the python code that calls it is
import numpy
import ctypes
dim = 4
N = dim * dim
# initialize arrays
i1 = numpy.ones((N), dtype=numpy.float32)
i2 = numpy.ones((N), dtype=numpy.float32)
o1 = numpy.zeros((N), dtype=numpy.float32)
# import DLL
E = ctypes.cdll.LoadLibrary("./myapp.so")
# run test
E.run( ctypes.c_void_p(i1.ctypes.data),
ctypes.c_void_p(i2.ctypes.data),
ctypes.c_void_p(o1.ctypes.data),
ctypes.c_int(dim))
# print output array
for i in range(0, len(o1)):
print o1[i]
in a file called cublas_test.py
I run the python code with
cuda-memcheck python cublas_test.py
and I get the following errors
========= CUDA-MEMCHECK
========= Invalid __global__ read of size 4
========= at 0x00000118 in void gemm_kernel1x1_core<float, bool=0, bool=0, bool=0, bool=0, bool=0>(float*, float const *, float const *, int, int, int, int, int, int, float*, float*, float, float, int)
========= by thread (3,3,0) in block (0,0,0)
========= Address 0x017b297c is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 (cuLaunchKernel + 0x2cd) [0x24d9dd]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x21fb31]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x23a343]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xea212]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xebe92]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xec603]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xecafa]
========= Host Frame:./myapp.so (run + 0x119) [0x51f5]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call_unix64 + 0x4c) [0x5e40]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call + 0x2eb) [0x58ab]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so (_ctypes_callproc + 0x48f) [0xd3df]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so [0x11d82]
========= Host Frame:python (PyEval_EvalFrameEx + 0x578d) [0xc166d]
========= Host Frame:python (PyEval_EvalCodeEx + 0x306) [0xb9b66]
========= Host Frame:python [0xeb69f]
========= Host Frame:python (PyRun_FileExFlags + 0x82) [0xe58f2]
========= Host Frame:python (PyRun_SimpleFileExFlags + 0x186) [0xe41a6]
========= Host Frame:python (Py_Main + 0x54e) [0x938ce]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf0) [0x20830]
========= Host Frame:python (_start + 0x29) [0x93299]
=========
========= Invalid __global__ read of size 4
========= at 0x00000118 in void gemm_kernel1x1_core<float, bool=0, bool=0, bool=0, bool=0, bool=0>(float*, float const *, float const *, int, int, int, int, int, int, float*, float*, float, float, int)
========= by thread (2,3,0) in block (0,0,0)
========= Address 0x017b2978 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 (cuLaunchKernel + 0x2cd) [0x24d9dd]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x21fb31]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x23a343]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xea212]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xebe92]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xec603]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xecafa]
========= Host Frame:./myapp.so (run + 0x119) [0x51f5]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call_unix64 + 0x4c) [0x5e40]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call + 0x2eb) [0x58ab]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so (_ctypes_callproc + 0x48f) [0xd3df]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so [0x11d82]
========= Host Frame:python (PyEval_EvalFrameEx + 0x578d) [0xc166d]
========= Host Frame:python (PyEval_EvalCodeEx + 0x306) [0xb9b66]
========= Host Frame:python [0xeb69f]
========= Host Frame:python (PyRun_FileExFlags + 0x82) [0xe58f2]
========= Host Frame:python (PyRun_SimpleFileExFlags + 0x186) [0xe41a6]
========= Host Frame:python (Py_Main + 0x54e) [0x938ce]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf0) [0x20830]
========= Host Frame:python (_start + 0x29) [0x93299]
=========
========= Invalid __global__ read of size 4
========= at 0x00000118 in void gemm_kernel1x1_core<float, bool=0, bool=0, bool=0, bool=0, bool=0>(float*, float const *, float const *, int, int, int, int, int, int, float*, float*, float, float, int)
========= by thread (1,3,0) in block (0,0,0)
========= Address 0x017b2974 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 (cuLaunchKernel + 0x2cd) [0x24d9dd]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x21fb31]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x23a343]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xea212]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xebe92]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xec603]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xecafa]
========= Host Frame:./myapp.so (run + 0x119) [0x51f5]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call_unix64 + 0x4c) [0x5e40]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call + 0x2eb) [0x58ab]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so (_ctypes_callproc + 0x48f) [0xd3df]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so [0x11d82]
========= Host Frame:python (PyEval_EvalFrameEx + 0x578d) [0xc166d]
========= Host Frame:python (PyEval_EvalCodeEx + 0x306) [0xb9b66]
========= Host Frame:python [0xeb69f]
========= Host Frame:python (PyRun_FileExFlags + 0x82) [0xe58f2]
========= Host Frame:python (PyRun_SimpleFileExFlags + 0x186) [0xe41a6]
========= Host Frame:python (Py_Main + 0x54e) [0x938ce]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf0) [0x20830]
========= Host Frame:python (_start + 0x29) [0x93299]
=========
========= Invalid __global__ read of size 4
========= at 0x00000118 in void gemm_kernel1x1_core<float, bool=0, bool=0, bool=0, bool=0, bool=0>(float*, float const *, float const *, int, int, int, int, int, int, float*, float*, float, float, int)
========= by thread (0,3,0) in block (0,0,0)
========= Address 0x017b2970 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 (cuLaunchKernel + 0x2cd) [0x24d9dd]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x21fb31]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x23a343]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xea212]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xebe92]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xec603]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xecafa]
========= Host Frame:./myapp.so (run + 0x119) [0x51f5]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call_unix64 + 0x4c) [0x5e40]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call + 0x2eb) [0x58ab]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so (_ctypes_callproc + 0x48f) [0xd3df]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so [0x11d82]
========= Host Frame:python (PyEval_EvalFrameEx + 0x578d) [0xc166d]
========= Host Frame:python (PyEval_EvalCodeEx + 0x306) [0xb9b66]
========= Host Frame:python [0xeb69f]
========= Host Frame:python (PyRun_FileExFlags + 0x82) [0xe58f2]
========= Host Frame:python (PyRun_SimpleFileExFlags + 0x186) [0xe41a6]
========= Host Frame:python (Py_Main + 0x54e) [0x938ce]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf0) [0x20830]
========= Host Frame:python (_start + 0x29) [0x93299]
=========
========= Invalid __global__ read of size 4
========= at 0x00000118 in void gemm_kernel1x1_core<float, bool=0, bool=0, bool=0, bool=0, bool=0>(float*, float const *, float const *, int, int, int, int, int, int, float*, float*, float, float, int)
========= by thread (3,2,0) in block (0,0,0)
========= Address 0x017b296c is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 (cuLaunchKernel + 0x2cd) [0x24d9dd]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x21fb31]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x23a343]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xea212]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xebe92]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xec603]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xecafa]
========= Host Frame:./myapp.so (run + 0x119) [0x51f5]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call_unix64 + 0x4c) [0x5e40]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call + 0x2eb) [0x58ab]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so (_ctypes_callproc + 0x48f) [0xd3df]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so [0x11d82]
========= Host Frame:python (PyEval_EvalFrameEx + 0x578d) [0xc166d]
========= Host Frame:python (PyEval_EvalCodeEx + 0x306) [0xb9b66]
========= Host Frame:python [0xeb69f]
========= Host Frame:python (PyRun_FileExFlags + 0x82) [0xe58f2]
========= Host Frame:python (PyRun_SimpleFileExFlags + 0x186) [0xe41a6]
========= Host Frame:python (Py_Main + 0x54e) [0x938ce]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf0) [0x20830]
========= Host Frame:python (_start + 0x29) [0x93299]
=========
========= Invalid __global__ read of size 4
========= at 0x00000118 in void gemm_kernel1x1_core<float, bool=0, bool=0, bool=0, bool=0, bool=0>(float*, float const *, float const *, int, int, int, int, int, int, float*, float*, float, float, int)
========= by thread (2,2,0) in block (0,0,0)
========= Address 0x017b2968 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 (cuLaunchKernel + 0x2cd) [0x24d9dd]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x21fb31]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x23a343]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xea212]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xebe92]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xec603]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xecafa]
========= Host Frame:./myapp.so (run + 0x119) [0x51f5]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call_unix64 + 0x4c) [0x5e40]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call + 0x2eb) [0x58ab]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so (_ctypes_callproc + 0x48f) [0xd3df]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so [0x11d82]
========= Host Frame:python (PyEval_EvalFrameEx + 0x578d) [0xc166d]
========= Host Frame:python (PyEval_EvalCodeEx + 0x306) [0xb9b66]
========= Host Frame:python [0xeb69f]
========= Host Frame:python (PyRun_FileExFlags + 0x82) [0xe58f2]
========= Host Frame:python (PyRun_SimpleFileExFlags + 0x186) [0xe41a6]
========= Host Frame:python (Py_Main + 0x54e) [0x938ce]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf0) [0x20830]
========= Host Frame:python (_start + 0x29) [0x93299]
=========
========= Invalid __global__ read of size 4
========= at 0x00000118 in void gemm_kernel1x1_core<float, bool=0, bool=0, bool=0, bool=0, bool=0>(float*, float const *, float const *, int, int, int, int, int, int, float*, float*, float, float, int)
========= by thread (1,2,0) in block (0,0,0)
========= Address 0x017b2964 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 (cuLaunchKernel + 0x2cd) [0x24d9dd]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x21fb31]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x23a343]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xea212]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xebe92]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xec603]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xecafa]
========= Host Frame:./myapp.so (run + 0x119) [0x51f5]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call_unix64 + 0x4c) [0x5e40]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call + 0x2eb) [0x58ab]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so (_ctypes_callproc + 0x48f) [0xd3df]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so [0x11d82]
========= Host Frame:python (PyEval_EvalFrameEx + 0x578d) [0xc166d]
========= Host Frame:python (PyEval_EvalCodeEx + 0x306) [0xb9b66]
========= Host Frame:python [0xeb69f]
========= Host Frame:python (PyRun_FileExFlags + 0x82) [0xe58f2]
========= Host Frame:python (PyRun_SimpleFileExFlags + 0x186) [0xe41a6]
========= Host Frame:python (Py_Main + 0x54e) [0x938ce]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf0) [0x20830]
========= Host Frame:python (_start + 0x29) [0x93299]
=========
========= Invalid __global__ read of size 4
========= at 0x00000118 in void gemm_kernel1x1_core<float, bool=0, bool=0, bool=0, bool=0, bool=0>(float*, float const *, float const *, int, int, int, int, int, int, float*, float*, float, float, int)
========= by thread (0,2,0) in block (0,0,0)
========= Address 0x017b2960 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 (cuLaunchKernel + 0x2cd) [0x24d9dd]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x21fb31]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x23a343]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xea212]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xebe92]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xec603]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xecafa]
========= Host Frame:./myapp.so (run + 0x119) [0x51f5]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call_unix64 + 0x4c) [0x5e40]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call + 0x2eb) [0x58ab]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so (_ctypes_callproc + 0x48f) [0xd3df]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so [0x11d82]
========= Host Frame:python (PyEval_EvalFrameEx + 0x578d) [0xc166d]
========= Host Frame:python (PyEval_EvalCodeEx + 0x306) [0xb9b66]
========= Host Frame:python [0xeb69f]
========= Host Frame:python (PyRun_FileExFlags + 0x82) [0xe58f2]
========= Host Frame:python (PyRun_SimpleFileExFlags + 0x186) [0xe41a6]
========= Host Frame:python (Py_Main + 0x54e) [0x938ce]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf0) [0x20830]
========= Host Frame:python (_start + 0x29) [0x93299]
=========
========= Invalid __global__ read of size 4
========= at 0x00000118 in void gemm_kernel1x1_core<float, bool=0, bool=0, bool=0, bool=0, bool=0>(float*, float const *, float const *, int, int, int, int, int, int, float*, float*, float, float, int)
========= by thread (3,1,0) in block (0,0,0)
========= Address 0x017b295c is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 (cuLaunchKernel + 0x2cd) [0x24d9dd]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x21fb31]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x23a343]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xea212]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xebe92]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xec603]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xecafa]
========= Host Frame:./myapp.so (run + 0x119) [0x51f5]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call_unix64 + 0x4c) [0x5e40]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call + 0x2eb) [0x58ab]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so (_ctypes_callproc + 0x48f) [0xd3df]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so [0x11d82]
========= Host Frame:python (PyEval_EvalFrameEx + 0x578d) [0xc166d]
========= Host Frame:python (PyEval_EvalCodeEx + 0x306) [0xb9b66]
========= Host Frame:python [0xeb69f]
========= Host Frame:python (PyRun_FileExFlags + 0x82) [0xe58f2]
========= Host Frame:python (PyRun_SimpleFileExFlags + 0x186) [0xe41a6]
========= Host Frame:python (Py_Main + 0x54e) [0x938ce]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf0) [0x20830]
========= Host Frame:python (_start + 0x29) [0x93299]
=========
========= Invalid __global__ read of size 4
========= at 0x00000118 in void gemm_kernel1x1_core<float, bool=0, bool=0, bool=0, bool=0, bool=0>(float*, float const *, float const *, int, int, int, int, int, int, float*, float*, float, float, int)
========= by thread (2,1,0) in block (0,0,0)
========= Address 0x017b2958 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 (cuLaunchKernel + 0x2cd) [0x24d9dd]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x21fb31]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x23a343]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xea212]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xebe92]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xec603]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xecafa]
========= Host Frame:./myapp.so (run + 0x119) [0x51f5]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call_unix64 + 0x4c) [0x5e40]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call + 0x2eb) [0x58ab]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so (_ctypes_callproc + 0x48f) [0xd3df]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so [0x11d82]
========= Host Frame:python (PyEval_EvalFrameEx + 0x578d) [0xc166d]
========= Host Frame:python (PyEval_EvalCodeEx + 0x306) [0xb9b66]
========= Host Frame:python [0xeb69f]
========= Host Frame:python (PyRun_FileExFlags + 0x82) [0xe58f2]
========= Host Frame:python (PyRun_SimpleFileExFlags + 0x186) [0xe41a6]
========= Host Frame:python (Py_Main + 0x54e) [0x938ce]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf0) [0x20830]
========= Host Frame:python (_start + 0x29) [0x93299]
=========
========= Invalid __global__ read of size 4
========= at 0x00000118 in void gemm_kernel1x1_core<float, bool=0, bool=0, bool=0, bool=0, bool=0>(float*, float const *, float const *, int, int, int, int, int, int, float*, float*, float, float, int)
========= by thread (1,1,0) in block (0,0,0)
========= Address 0x017b2954 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 (cuLaunchKernel + 0x2cd) [0x24d9dd]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x21fb31]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x23a343]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xea212]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xebe92]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xec603]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xecafa]
========= Host Frame:./myapp.so (run + 0x119) [0x51f5]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call_unix64 + 0x4c) [0x5e40]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call + 0x2eb) [0x58ab]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so (_ctypes_callproc + 0x48f) [0xd3df]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so [0x11d82]
========= Host Frame:python (PyEval_EvalFrameEx + 0x578d) [0xc166d]
========= Host Frame:python (PyEval_EvalCodeEx + 0x306) [0xb9b66]
========= Host Frame:python [0xeb69f]
========= Host Frame:python (PyRun_FileExFlags + 0x82) [0xe58f2]
========= Host Frame:python (PyRun_SimpleFileExFlags + 0x186) [0xe41a6]
========= Host Frame:python (Py_Main + 0x54e) [0x938ce]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf0) [0x20830]
========= Host Frame:python (_start + 0x29) [0x93299]
=========
========= Invalid __global__ read of size 4
========= at 0x00000118 in void gemm_kernel1x1_core<float, bool=0, bool=0, bool=0, bool=0, bool=0>(float*, float const *, float const *, int, int, int, int, int, int, float*, float*, float, float, int)
========= by thread (0,1,0) in block (0,0,0)
========= Address 0x017b2950 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 (cuLaunchKernel + 0x2cd) [0x24d9dd]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x21fb31]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x23a343]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xea212]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xebe92]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xec603]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xecafa]
========= Host Frame:./myapp.so (run + 0x119) [0x51f5]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call_unix64 + 0x4c) [0x5e40]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call + 0x2eb) [0x58ab]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so (_ctypes_callproc + 0x48f) [0xd3df]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so [0x11d82]
========= Host Frame:python (PyEval_EvalFrameEx + 0x578d) [0xc166d]
========= Host Frame:python (PyEval_EvalCodeEx + 0x306) [0xb9b66]
========= Host Frame:python [0xeb69f]
========= Host Frame:python (PyRun_FileExFlags + 0x82) [0xe58f2]
========= Host Frame:python (PyRun_SimpleFileExFlags + 0x186) [0xe41a6]
========= Host Frame:python (Py_Main + 0x54e) [0x938ce]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf0) [0x20830]
========= Host Frame:python (_start + 0x29) [0x93299]
=========
========= Invalid __global__ read of size 4
========= at 0x00000118 in void gemm_kernel1x1_core<float, bool=0, bool=0, bool=0, bool=0, bool=0>(float*, float const *, float const *, int, int, int, int, int, int, float*, float*, float, float, int)
========= by thread (3,0,0) in block (0,0,0)
========= Address 0x017b294c is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 (cuLaunchKernel + 0x2cd) [0x24d9dd]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x21fb31]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x23a343]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xea212]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xebe92]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xec603]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xecafa]
========= Host Frame:./myapp.so (run + 0x119) [0x51f5]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call_unix64 + 0x4c) [0x5e40]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call + 0x2eb) [0x58ab]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so (_ctypes_callproc + 0x48f) [0xd3df]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so [0x11d82]
========= Host Frame:python (PyEval_EvalFrameEx + 0x578d) [0xc166d]
========= Host Frame:python (PyEval_EvalCodeEx + 0x306) [0xb9b66]
========= Host Frame:python [0xeb69f]
========= Host Frame:python (PyRun_FileExFlags + 0x82) [0xe58f2]
========= Host Frame:python (PyRun_SimpleFileExFlags + 0x186) [0xe41a6]
========= Host Frame:python (Py_Main + 0x54e) [0x938ce]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf0) [0x20830]
========= Host Frame:python (_start + 0x29) [0x93299]
=========
========= Invalid __global__ read of size 4
========= at 0x00000118 in void gemm_kernel1x1_core<float, bool=0, bool=0, bool=0, bool=0, bool=0>(float*, float const *, float const *, int, int, int, int, int, int, float*, float*, float, float, int)
========= by thread (2,0,0) in block (0,0,0)
========= Address 0x017b2948 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 (cuLaunchKernel + 0x2cd) [0x24d9dd]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x21fb31]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x23a343]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xea212]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xebe92]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xec603]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xecafa]
========= Host Frame:./myapp.so (run + 0x119) [0x51f5]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call_unix64 + 0x4c) [0x5e40]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call + 0x2eb) [0x58ab]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so (_ctypes_callproc + 0x48f) [0xd3df]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so [0x11d82]
========= Host Frame:python (PyEval_EvalFrameEx + 0x578d) [0xc166d]
========= Host Frame:python (PyEval_EvalCodeEx + 0x306) [0xb9b66]
========= Host Frame:python [0xeb69f]
========= Host Frame:python (PyRun_FileExFlags + 0x82) [0xe58f2]
========= Host Frame:python (PyRun_SimpleFileExFlags + 0x186) [0xe41a6]
========= Host Frame:python (Py_Main + 0x54e) [0x938ce]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf0) [0x20830]
========= Host Frame:python (_start + 0x29) [0x93299]
=========
========= Invalid __global__ read of size 4
========= at 0x00000118 in void gemm_kernel1x1_core<float, bool=0, bool=0, bool=0, bool=0, bool=0>(float*, float const *, float const *, int, int, int, int, int, int, float*, float*, float, float, int)
========= by thread (1,0,0) in block (0,0,0)
========= Address 0x017b2944 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 (cuLaunchKernel + 0x2cd) [0x24d9dd]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x21fb31]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x23a343]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xea212]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xebe92]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xec603]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xecafa]
========= Host Frame:./myapp.so (run + 0x119) [0x51f5]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call_unix64 + 0x4c) [0x5e40]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call + 0x2eb) [0x58ab]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so (_ctypes_callproc + 0x48f) [0xd3df]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so [0x11d82]
========= Host Frame:python (PyEval_EvalFrameEx + 0x578d) [0xc166d]
========= Host Frame:python (PyEval_EvalCodeEx + 0x306) [0xb9b66]
========= Host Frame:python [0xeb69f]
========= Host Frame:python (PyRun_FileExFlags + 0x82) [0xe58f2]
========= Host Frame:python (PyRun_SimpleFileExFlags + 0x186) [0xe41a6]
========= Host Frame:python (Py_Main + 0x54e) [0x938ce]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf0) [0x20830]
========= Host Frame:python (_start + 0x29) [0x93299]
=========
========= Invalid __global__ read of size 4
========= at 0x00000118 in void gemm_kernel1x1_core<float, bool=0, bool=0, bool=0, bool=0, bool=0>(float*, float const *, float const *, int, int, int, int, int, int, float*, float*, float, float, int)
========= by thread (0,0,0) in block (0,0,0)
========= Address 0x017b2940 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 (cuLaunchKernel + 0x2cd) [0x24d9dd]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x21fb31]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0x23a343]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xea212]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xebe92]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xec603]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcublas.so.7.5 [0xecafa]
========= Host Frame:./myapp.so (run + 0x119) [0x51f5]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call_unix64 + 0x4c) [0x5e40]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call + 0x2eb) [0x58ab]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so (_ctypes_callproc + 0x48f) [0xd3df]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so [0x11d82]
========= Host Frame:python (PyEval_EvalFrameEx + 0x578d) [0xc166d]
========= Host Frame:python (PyEval_EvalCodeEx + 0x306) [0xb9b66]
========= Host Frame:python [0xeb69f]
========= Host Frame:python (PyRun_FileExFlags + 0x82) [0xe58f2]
========= Host Frame:python (PyRun_SimpleFileExFlags + 0x186) [0xe41a6]
========= Host Frame:python (Py_Main + 0x54e) [0x938ce]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf0) [0x20830]
========= Host Frame:python (_start + 0x29) [0x93299]
=========
========= Program hit cudaErrorLaunchFailure (error 4) due to "unspecified launch failure" on CUDA API call to cudaMemcpy.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x351c13]
========= Host Frame:./myapp.so [0x39e4f]
========= Host Frame:./myapp.so (run + 0x138) [0x5214]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call_unix64 + 0x4c) [0x5e40]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call + 0x2eb) [0x58ab]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so (_ctypes_callproc + 0x48f) [0xd3df]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so [0x11d82]
========= Host Frame:python (PyEval_EvalFrameEx + 0x578d) [0xc166d]
========= Host Frame:python (PyEval_EvalCodeEx + 0x306) [0xb9b66]
========= Host Frame:python [0xeb69f]
========= Host Frame:python (PyRun_FileExFlags + 0x82) [0xe58f2]
========= Host Frame:python (PyRun_SimpleFileExFlags + 0x186) [0xe41a6]
========= Host Frame:python (Py_Main + 0x54e) [0x938ce]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf0) [0x20830]
========= Host Frame:python (_start + 0x29) [0x93299]
=========
========= Program hit cudaErrorLaunchFailure (error 4) due to "unspecified launch failure" on CUDA API call to cudaFree.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x351c13]
========= Host Frame:./myapp.so [0x41996]
========= Host Frame:./myapp.so (run + 0x144) [0x5220]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call_unix64 + 0x4c) [0x5e40]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call + 0x2eb) [0x58ab]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so (_ctypes_callproc + 0x48f) [0xd3df]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so [0x11d82]
========= Host Frame:python (PyEval_EvalFrameEx + 0x578d) [0xc166d]
========= Host Frame:python (PyEval_EvalCodeEx + 0x306) [0xb9b66]
========= Host Frame:python [0xeb69f]
========= Host Frame:python (PyRun_FileExFlags + 0x82) [0xe58f2]
========= Host Frame:python (PyRun_SimpleFileExFlags + 0x186) [0xe41a6]
========= Host Frame:python (Py_Main + 0x54e) [0x938ce]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf0) [0x20830]
========= Host Frame:python (_start + 0x29) [0x93299]
=========
========= Program hit cudaErrorLaunchFailure (error 4) due to "unspecified launch failure" on CUDA API call to cudaFree.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x351c13]
========= Host Frame:./myapp.so [0x41996]
========= Host Frame:./myapp.so (run + 0x150) [0x522c]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call_unix64 + 0x4c) [0x5e40]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call + 0x2eb) [0x58ab]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so (_ctypes_callproc + 0x48f) [0xd3df]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so [0x11d82]
========= Host Frame:python (PyEval_EvalFrameEx + 0x578d) [0xc166d]
========= Host Frame:python (PyEval_EvalCodeEx + 0x306) [0xb9b66]
========= Host Frame:python [0xeb69f]
========= Host Frame:python (PyRun_FileExFlags + 0x82) [0xe58f2]
========= Host Frame:python (PyRun_SimpleFileExFlags + 0x186) [0xe41a6]
========= Host Frame:python (Py_Main + 0x54e) [0x938ce]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf0) [0x20830]
========= Host Frame:python (_start + 0x29) [0x93299]
=========
========= Program hit cudaErrorLaunchFailure (error 4) due to "unspecified launch failure" on CUDA API call to cudaFree.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x351c13]
========= Host Frame:./myapp.so [0x41996]
========= Host Frame:./myapp.so (run + 0x15c) [0x5238]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call_unix64 + 0x4c) [0x5e40]
========= Host Frame:/usr/lib/x86_64-linux-gnu/libffi.so.6 (ffi_call + 0x2eb) [0x58ab]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so (_ctypes_callproc + 0x48f) [0xd3df]
========= Host Frame:/usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so [0x11d82]
========= Host Frame:python (PyEval_EvalFrameEx + 0x578d) [0xc166d]
========= Host Frame:python (PyEval_EvalCodeEx + 0x306) [0xb9b66]
========= Host Frame:python [0xeb69f]
========= Host Frame:python (PyRun_FileExFlags + 0x82) [0xe58f2]
========= Host Frame:python (PyRun_SimpleFileExFlags + 0x186) [0xe41a6]
========= Host Frame:python (Py_Main + 0x54e) [0x938ce]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xf0) [0x20830]
========= Host Frame:python (_start + 0x29) [0x93299]
=========
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
========= ERROR SUMMARY: 20 errors
I have a feeling there is something wrong with the cuda part but I am not sure what. Help would be greatly appreciated.