CURAND_STATUS_LAUNCH_FAILURE + CUDA 7.5

My code is as follows:

#include <cuda_runtime.h>
    #include <cuda.h>
    #include <curand.h>
    #include <stdio.h>
    
    #define gpuErrorCheckCurand(ans) { gpuAssertCurand((ans), __FILE__, __LINE__); }
    #define gpuErrorCheckCuda(ans) { gpuAssert((ans), __FILE__, __LINE__); }
    
    const char* curandGetErrorString(curandStatus_t status)
    {
        switch(status)
        {
        case CURAND_STATUS_SUCCESS: return "CURAND_STATUS_SUCCESS";
    	case CURAND_STATUS_VERSION_MISMATCH: return "CURAND_STATUS_VERSION_MISMATCH";
    	case CURAND_STATUS_NOT_INITIALIZED: return "CURAND_STATUS_NOT_INITIALIZED";
    	case CURAND_STATUS_ALLOCATION_FAILED: return "CURAND_STATUS_ALLOCATION_FAILED";
    	case CURAND_STATUS_TYPE_ERROR: return "CURAND_STATUS_TYPE_ERROR";
    	case CURAND_STATUS_OUT_OF_RANGE: return "CURAND_STATUS_OUT_OF_RANGE";
    	case CURAND_STATUS_LENGTH_NOT_MULTIPLE: return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
    	case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED: return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
    	case CURAND_STATUS_LAUNCH_FAILURE: return "CURAND_STATUS_LAUNCH_FAILURE";
    	case CURAND_STATUS_PREEXISTING_FAILURE: return "CURAND_STATUS_PREEXISTING_FAILURE";
    	case CURAND_STATUS_INITIALIZATION_FAILED: return "CURAND_STATUS_INITIALIZATION_FAILED";
    	case CURAND_STATUS_ARCH_MISMATCH: return "CURAND_STATUS_ARCH_MISMATCH";
    	case CURAND_STATUS_INTERNAL_ERROR: return "CURAND_STATUS_INTERNAL_ERROR";
        }
        return "Unknown cuRAND error";
    }
    
    inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=false) {
      	if (code != cudaSuccess) {
    		printf("GPU assert: %s %s %d\n", cudaGetErrorString(code), file, line);
         	if (abort) exit(code);
       }
    }
    
    inline void gpuAssertCurand(curandStatus_t code, const char *file, int line, bool abort=false) {
      	if (code != CURAND_STATUS_SUCCESS) {		
    		printf("GPU (CURAND) assert: %s %s %d\n", curandGetErrorString(code), file, line);
    	if (abort) exit(code);
       }
    }
    
    
    int main() {
        curandGenerator_t generator;
        int n = 100;
        gpuErrorCheckCurand(curandCreateGenerator(&generator, CURAND_RNG_PSEUDO_DEFAULT));
        gpuErrorCheckCurand(curandSetPseudoRandomGeneratorSeed(generator, 1234ULL));
        float* uniformDevice; gpuErrorCheckCuda(cudaMalloc(&uniformDevice, n * sizeof(float)));
        gpuErrorCheckCurand(curandGenerateUniform(generator, uniformDevice, n)); 
        gpuErrorCheckCurand(curandDestroyGenerator(generator));
        gpuErrorCheckCuda(cudaFree(uniformDevice));
    }

I compile the code above with a command:

nvcc rand.cu -lcurand

For a record my graphic card is GTX 1060 and I’m using CUDA 7.5 with driver version 375.39. The issue is an error status CURAND_STATUS_LAUNCH_FAILURE during generation random numbers. The result from cuda-memcheck is:

Program hit cudaErrorInvalidDeviceFunction (error 8) due to "invalid device function" on CUDA API call to cudaGetLastError.

Does anybody know what’s wrong?

See here:

https://devtalk.nvidia.com/default/topic/941354/all-curand-samples-fail-with-gtx-1080-on-cuda-7-5/

switch to CUDA 8 for Pascal (cc 6.x) GPUs.