Three arrayA of 1023 elements and three arrayB of 1024 elements be allocated as bellow
int *a1,*a2,*a3;
int *b1,*b2,*b3;
int sizeA = 1023 * sizeof(int);
int sizeB = 1024 * sizeof(int);
cutStartTimer(timer);
CUDA_SAFE_CALL(cudaMalloc( (void**) &a1, sizeA));
CUDA_SAFE_CALL(cudaMalloc( (void**) &a1, sizeA));
CUDA_SAFE_CALL(cudaMalloc( (void**) &a1, sizeA));
cutStopTimer(timer);
printf(“alloc time = %f”, cutGetTimerValue(timer)); // alloc time = 0.016ms
cutStartTimer(timer);
CUDA_SAFE_CALL(cudaMalloc( (void**) &a1, sizeB));
CUDA_SAFE_CALL(cudaMalloc( (void**) &a1, sizeB));
CUDA_SAFE_CALL(cudaMalloc( (void**) &a1, sizeB));
cutStopTimer(timer);
printf(“alloc time = %f”, cutGetTimerValue(timer)); // alloc time = 0.825ms
can anyone tell what makes difference between times of allocating these array of 1023 elements and 1024 elements ?