I have a large function that I’ve reduced to the representative minimum in the sample below, that the compiler seems to think does no work and is removed.
Is there anything I can do to prevent this occuring?
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <assert.h>
#include <cuda.h>
#include <cuda_runtime.h>
#define LUT(a,b,c,d,e) asm("lop3.b32 %0, %1, %2, %3, "#e";" : "=r"(a): "r"(b), "r"(c), "r"(d));
__device__ void sb(const uint32_t x0, const uint32_t x1, const uint32_t x2, uint32_t y0){
LUT(y0, x0, x1, x2, 0x1F);
}
__global__ void test(){
uint32_t in0 = 0;
uint32_t in1 = 1;
uint32_t in2 = 2;
uint32_t out0;
sb(in0, in1, in2, out0);
printf("0x%08X\n", out0);
}
int main(){
test<<<1, 1>>>();
cudaDeviceSynchronize();
return 0;
}
Output of compiler:
make all
Building file: ../sbox.cu
Invoking: NVCC Compiler
/usr/local/cuda/bin/nvcc --ptxas-options=-v -O3 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_61,code=compute_61 -ccbin g++ -c -o "sbox.o" "../sbox.cu"
../sbox.cu(11): warning: parameter "y0" was set but never used
../sbox.cu(23): warning: variable "out0" is used before its value is set
ptxas info : 8 bytes gmem
ptxas info : Compiling entry function '_Z4testv' for 'sm_61'
ptxas info : Function properties for _Z4testv
8 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
ptxas info : Used 8 registers, 320 bytes cmem[0]
../sbox.cu(23): warning: variable "out0" is used before its value is set
Finished building: ../sbox.cu
Building target: sbox
Invoking: NVCC linker
/usr/local/cuda/bin/nvcc --cudart=static -ccbin g++ -gencode arch=compute_61,code=sm_61 -gencode arch=compute_61,code=compute_61 -o "sbox" ./sbox.o
Finished building target: sbox
14:51:40 Build Finished. 0 errors, 0 warnings. (took 1s.74ms)