Hello,
We have some code using NPP with a non-default streams. It was working as expected with CUDA 6.5, and based on our testing, it works with CUDA <= 7.5 and breaks under CUDA >= 8. We would like to use the newest version, CUDA 9.2. We’re using driver version 396.37 with CentOS 7.
In the following code sample, the program exits with SIGABRT and the message
terminate called after throwing an instance of 'NppStatus'
Aborted
The backtrace shows the program aborts in nppiMinMaxIndxGetBufferHostSize_16u_C1MR. We’d like to understand if we’re abusing the CUDA API, or if this is a bug.
Makefile (modify CUDA_INSTALL_PATH and CUDA7_INSTALL_PATH):
CUDA_INSTALL_PATH = /data/zmarvel/cuda-9.2
CUDA7_INSTALL_PATH = /data/zmarvel/cuda-7.5
NVCC = $(CUDA_INSTALL_PATH)/bin/nvcc
NVCC7 = $(CUDA7_INSTALL_PATH)/bin/nvcc
# Flags for CUDA 9.2
CUDA_LIB_NAMES = -lnppc -lnppist
# Flags for CUDA 7.5
CUDA7_LIB_NAMES = -lnppc -lnppi
CUDA_INC_PATH = $(CUDA_INSTALL_PATH)/include
CUDA_LIB_PATH = $(CUDA_INSTALL_PATH)/lib64
CUDA7_INC_PATH = $(CUDA7_INSTALL_PATH)/include
CUDA7_LIB_PATH = $(CUDA7_INSTALL_PATH)/lib64
NVCCFLAGS := -g -O0 -Xcompiler -fPIC --gpu-architecture=compute_30 \
--gpu-code=sm_30,compute_30 --ptxas-options=-v -Xcompiler -Wno-enum-compare
all: main7.5 main9.2
main9.2: main.cpp Makefile
$(NVCC) $(NVCCFLAGS) \
-I$(CUDA_INC_PATH) -L$(CUDA_LIB_PATH) \
$(CUDA_LIB_NAMES) $< -o $@
main7.5: main.cpp Makefile
$(NVCC7) $(NVCCFLAGS) -I$(CUDA7_INC_PATH) -L$(CUDA7_LIB_PATH) \
$(CUDA7_LIB_NAMES) $< -o $@
clean:
rm -f main main7.5 main9.2
Code sample (set LD_LIBRARY_PATH appropriately for main7.5 and main9.2):
#include <stdio.h>
#include <unistd.h>
#include "cuda.h"
#include "cuda_runtime.h"
#include "npp.h"
int main(int argc, char *argv[]) {
#define CUDA_CHECK(x) do { \
cudaError_t X = x; \
if (X != CUDA_SUCCESS) \
return -1; \
} while (0);
#define NPP_CHECK(x) do { \
NppStatus X = x; \
if (X != NPP_SUCCESS) \
return -1; \
} while (0);
int bufsz;
NppiSize sz = { 1024, 1024 };
cudaStream_t stream1, stream2;
CUDA_CHECK(cudaStreamCreate(&stream1));
CUDA_CHECK(cudaDeviceSynchronize());
nppSetStream(stream1);
CUDA_CHECK(cudaStreamDestroy(stream1));
// The program will not abort if the following line is removed.
CUDA_CHECK(cudaDeviceReset());
CUDA_CHECK(cudaStreamCreate(&stream2));
CUDA_CHECK(cudaDeviceSynchronize());
nppSetStream(stream2);
// Program aborts here
NPP_CHECK(nppiMinMaxIndxGetBufferHostSize_16u_C1MR(sz, &bufsz));
printf("bufsz: %d\n", bufsz);
CUDA_CHECK(cudaStreamDestroy(stream2));
return 0;
}
Thanks in advance,
Zack