Hi,
is there a way to compile the kernel into ptx without nvcc adding the ‘Z9’ prefixes to function names?
I name the kernel: my_kernel
when compiling, nvcc renames the kernel to: _Z9my_kernelv
Because every time i am going to recompile the kernel the symbol name may change and cuModuleGetFunction() will fail. Was looking everywere, can’t find anything on the subject. I wonder how everyone else is doing this when they use driver API. Thanks in advance.
master waver # cat k.cu
constant char *ptr;
global void my_kernel() {
ptr[0]=1;
}
master waver # nvcc --device-compilation C -arch=sm_13 --ptx k.cu
master waver # cat k.ptx
.version 1.4
.target sm_13
// compiled with /usr/local/cuda/open64/lib//be
// nvopencc 3.0 built on 2009-10-26
//-----------------------------------------------------------
// Compiling /tmp/tmpxft_0000371b_00000000-7_k.cpp3.i (/tmp/ccBI#.obip92)
//-----------------------------------------------------------
//-----------------------------------------------------------
// Options:
//-----------------------------------------------------------
// Target:ptx, ISA:sm_13, Endian:little, Pointer Size:64
// -O3 (Optimization level)
// -g0 (Debug level)
// -m2 (Report advisories)
//-----------------------------------------------------------
.file 1 “”
.file 2 "/tmp/tmpxft_0000371b_00000000-6_k.cudafe2.gpu"
.file 3 "/usr/lib/gcc/x86_64-pc-linux-gnu/4.3.2/include/stddef.h"
.file 4 "/usr/local/cuda/bin/../include/crt/device_runtime.h"
.file 5 "/usr/local/cuda/bin/../include/host_defines.h"
.file 6 "/usr/local/cuda/bin/../include/builtin_types.h"
.file 7 "/usr/local/cuda/bin/../include/device_types.h"
.file 8 "/usr/local/cuda/bin/../include/driver_types.h"
.file 9 "/usr/local/cuda/bin/../include/surface_types.h"
.file 10 "/usr/local/cuda/bin/../include/texture_types.h"
.file 11 "/usr/local/cuda/bin/../include/vector_types.h"
.file 12 "/usr/local/cuda/bin/../include/device_launch_parameters.h"
.file 13 "/usr/local/cuda/bin/../include/crt/storage_class.h"
.file 14 "/usr/include/bits/types.h"
.file 15 "/usr/include/time.h"
.file 16 "k.cu"
.file 17 "/usr/local/cuda/bin/../include/common_functions.h"
.file 18 "/usr/local/cuda/bin/../include/crt/func_macro.h"
.file 19 "/usr/local/cuda/bin/../include/math_functions.h"
.file 20 "/usr/local/cuda/bin/../include/device_functions.h"
.file 21 "/usr/local/cuda/bin/../include/math_constants.h"
.file 22 "/usr/local/cuda/bin/../include/sm_11_atomic_functions.h"
.file 23 "/usr/local/cuda/bin/../include/sm_12_atomic_functions.h"
.file 24 "/usr/local/cuda/bin/../include/sm_13_double_functions.h"
.file 25 "/usr/local/cuda/bin/../include/sm_20_atomic_functions.h"
.file 26 "/usr/local/cuda/bin/../include/sm_20_intrinsics.h"
.file 27 "/usr/local/cuda/bin/../include/surface_functions.h"
.file 28 "/usr/local/cuda/bin/../include/texture_fetch_functions.h"
.file 29 "/usr/local/cuda/bin/../include/math_functions_dbl_ptx3.h"
.const .u64 ptr;
.entry _Z9my_kernelv
{
.reg .u16 %rh<3>;
.reg .u64 %rd<3>;
.loc 16 1896 0
$LBB1__Z9my_kernelv:
.loc 16 5 0
mov.s16 %rh1, 1;
ld.const.u64 %rd1, [ptr];
st.global.s8 [%rd1+0], %rh1;
.loc 16 7 0
exit;
$LDWend__Z9my_kernelv:
} // <b>_Z9my_kernelv</b>
master waver #