nvcc changes symbol names when compiling to ptx

Hi,

is there a way to compile the kernel into ptx without nvcc adding the ‘Z9’ prefixes to function names?

I name the kernel: my_kernel

when compiling, nvcc renames the kernel to: _Z9my_kernelv

Because every time i am going to recompile the kernel the symbol name may change and cuModuleGetFunction() will fail. Was looking everywere, can’t find anything on the subject. I wonder how everyone else is doing this when they use driver API. Thanks in advance.

master waver # cat k.cu

constant char *ptr;

global void my_kernel() {

ptr[0]=1;

}

master waver # nvcc --device-compilation C -arch=sm_13 --ptx k.cu

master waver # cat k.ptx

    .version 1.4

    .target sm_13

    // compiled with /usr/local/cuda/open64/lib//be

    // nvopencc 3.0 built on 2009-10-26

//-----------------------------------------------------------

    // Compiling /tmp/tmpxft_0000371b_00000000-7_k.cpp3.i (/tmp/ccBI#.obip92)

    //-----------------------------------------------------------

//-----------------------------------------------------------

    // Options:

    //-----------------------------------------------------------

    //  Target:ptx, ISA:sm_13, Endian:little, Pointer Size:64

    //  -O3 (Optimization level)

    //  -g0 (Debug level)

    //  -m2 (Report advisories)

    //-----------------------------------------------------------

.file 1 “”

    .file   2       "/tmp/tmpxft_0000371b_00000000-6_k.cudafe2.gpu"

    .file   3       "/usr/lib/gcc/x86_64-pc-linux-gnu/4.3.2/include/stddef.h"

    .file   4       "/usr/local/cuda/bin/../include/crt/device_runtime.h"

    .file   5       "/usr/local/cuda/bin/../include/host_defines.h"

    .file   6       "/usr/local/cuda/bin/../include/builtin_types.h"

    .file   7       "/usr/local/cuda/bin/../include/device_types.h"

    .file   8       "/usr/local/cuda/bin/../include/driver_types.h"

    .file   9       "/usr/local/cuda/bin/../include/surface_types.h"

    .file   10      "/usr/local/cuda/bin/../include/texture_types.h"

    .file   11      "/usr/local/cuda/bin/../include/vector_types.h"

    .file   12      "/usr/local/cuda/bin/../include/device_launch_parameters.h"

    .file   13      "/usr/local/cuda/bin/../include/crt/storage_class.h"

    .file   14      "/usr/include/bits/types.h"

    .file   15      "/usr/include/time.h"

    .file   16      "k.cu"

    .file   17      "/usr/local/cuda/bin/../include/common_functions.h"

    .file   18      "/usr/local/cuda/bin/../include/crt/func_macro.h"

    .file   19      "/usr/local/cuda/bin/../include/math_functions.h"

    .file   20      "/usr/local/cuda/bin/../include/device_functions.h"

    .file   21      "/usr/local/cuda/bin/../include/math_constants.h"

    .file   22      "/usr/local/cuda/bin/../include/sm_11_atomic_functions.h"

    .file   23      "/usr/local/cuda/bin/../include/sm_12_atomic_functions.h"

    .file   24      "/usr/local/cuda/bin/../include/sm_13_double_functions.h"

    .file   25      "/usr/local/cuda/bin/../include/sm_20_atomic_functions.h"

    .file   26      "/usr/local/cuda/bin/../include/sm_20_intrinsics.h"

    .file   27      "/usr/local/cuda/bin/../include/surface_functions.h"

    .file   28      "/usr/local/cuda/bin/../include/texture_fetch_functions.h"

    .file   29      "/usr/local/cuda/bin/../include/math_functions_dbl_ptx3.h"

.const .u64 ptr;

.entry _Z9my_kernelv

    {

    .reg .u16 %rh<3>;

    .reg .u64 %rd<3>;

    .loc    16      1896    0

$LBB1__Z9my_kernelv:

    .loc    16      5       0

    mov.s16         %rh1, 1;

    ld.const.u64    %rd1, [ptr];

    st.global.s8    [%rd1+0], %rh1;

    .loc    16      7       0

    exit;

$LDWend__Z9my_kernelv:

    } // <b>_Z9my_kernelv</b>

master waver #

That is C++ function name mangling, and it is completely normal for C++. If you declare them using extern “C”, it should not do any mangling and the “original” name of your function should work with the kernel launch functions of either API.

i can’t beleive it, i had this problem before. and i checked it before posting, and it didn’t work so i thought ptx were generated diferently. now i found out that when checking i put extern “C” after global declaration , like this

global extern “C” void my_kernel()

and got

k.cu(3): error: linkage specification is not allowed

now i put it before, and it works

extern “C” global void my_kernel()

thanks again