__nanosleep not working as expected

My guess is that nanosleep may have an undocumented upper bound on the argument.

This seems to work for me:

$ cat t2004.cu
#include <stdio.h>

#define RUNTIME_API_CALL(apiFuncCall)                                          \
  do {                                                                         \
    cudaError_t _status = apiFuncCall;                                         \
    if (_status != cudaSuccess) {                                              \
      fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n",     \
              __FILE__, __LINE__, #apiFuncCall, cudaGetErrorString(_status));  \
      exit(-1);                                                                \
    }                                                                          \
  } while (0)

__global__ void kernel() {
#if __CUDA_ARCH__ >= 700
  for (int i = 0; i < 1000; i++)
    __nanosleep(1000000U); // ls
#else
  printf(">>> __CUDA_ARCH__ !\n");
#endif
}

int main() {
  cudaEvent_t start, stop;
  RUNTIME_API_CALL(cudaEventCreate(&start));
  RUNTIME_API_CALL(cudaEventCreate(&stop));

  RUNTIME_API_CALL(cudaEventRecord(start));
  kernel<<<1, 1>>>();
  RUNTIME_API_CALL(cudaEventRecord(stop));
  RUNTIME_API_CALL(cudaEventSynchronize(stop));

  float duration;
  RUNTIME_API_CALL(cudaEventElapsedTime(&duration, start, stop));
  printf("Elapsed time: %fms\n", duration);

  return 0;
}
$ nvcc -arch=sm_70 -o t2004 t2004.cu
$ ./t2004
Elapsed time: 1048.487671ms
$

In the above example, if I pass arguments that are 1000000 or less to nanosleep, I get approximately expected timing. If I pass arguments that are 10000000 or greater, I don’t. So I guess there is a threshold of some sort between 1000000 and 10000000.

You might wish to file a bug. It’s possible this is a documentation issue, or there may be some other issue I am not aware of.

It seems probable that the only actual guarantee is that the actual sleep duration will be in the range [0, 2*t] where t is the argument. Given that, I couldn’t categorically state that any guarantees are violated, but the function behavior is curious around that threshold and I can’t explain it.