Hello All,
While registrating the time needed to allocate a certain amount of data we encountered the following problem. It appears that defining a constant changes the time needed to allocate data on the device. It is unclear whether the timer is manipulated or something else is happening. This is the code (we took parts of our original code just for testing):
// include standard libraries
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include <ctime>
#include <unistd.h>
// CUDA includes
#include <cutil.h>
#include "constants.h"
int main(int argc, char* argv[]) {
#ifdef __DEVICE_EMULATION__
printf("EMULATION mode\n\n");
#else
printf("GPU mode\n\n");
#endif
// number of x-, y- and z-planes
int h_nx = atoi(argv[1]);
int h_ny = atoi(argv[2]);
printf("Number of planes\n");
printf("X planes = %i\n", h_nx);
printf("Y planes = %i\n", h_ny);
// total number of voxels.
int Ntotal = (h_nx-1)*(h_ny-1);
// allocate constants on device and fill them
CUDA_SAFE_CALL(cudaMemcpyToSymbol("nx", &h_nx, sizeof(h_nx)));
CUDA_SAFE_CALL(cudaMemcpyToSymbol("ny", &h_ny, sizeof(h_ny)));
int mem_size = Ntotal*sizeof(float);
printf("Ntotal = %i\n", Ntotal);
unsigned int timerAlloc = 0;
CUT_SAFE_CALL(cutCreateTimer(&timerAlloc));
CUT_SAFE_CALL(cutStartTimer(timerAlloc));
// We only allocate, in this case we don't care which data our host array contains.
float *h_CT_dataset;
h_CT_dataset = (float*) malloc(Ntotal*sizeof(float));
float *d_CT_dataset;
CUDA_SAFE_CALL(cudaMalloc((void**) &d_CT_dataset, Ntotal*sizeof(float)));
CUDA_SAFE_CALL(cudaMemcpy(d_CT_dataset, h_CT_dataset, Ntotal*sizeof(float), cudaMemcpyHostToDevice));
float *h_radiological_path;
float *d_radiological_path;
h_radiological_path = (float*) malloc(mem_size);
CUDA_SAFE_CALL(cudaMalloc((void**) &d_radiological_path, mem_size));
// stop and destroy timer
CUT_SAFE_CALL(cutStopTimer(timerAlloc));
printf("Memory allocation for rho and RD: %f (ms) \n", cutGetTimerValue(timerAlloc));
CUT_SAFE_CALL(cutDeleteTimer(timerAlloc));
}
and in constants.h:
#ifndef CONSTANTS_H
#define CONSTANTS_H
__device__ __constant__ int nx;
__device__ __constant__ int ny;
#endif // CONSTANTS_H
Timing the allocation as coded above returns
GPU mode
Number of planes
X planes = 1025
Y planes = 1025
Ntotal = 1048576
Memory allocation for rho and RD: 2.790000 (ms)
commenting out
CUDA_SAFE_CALL(cudaMemcpyToSymbol("nx", &h_nx, sizeof(h_nx)));
CUDA_SAFE_CALL(cudaMemcpyToSymbol("ny", &h_ny, sizeof(h_ny)));
however gives:
GPU mode
Number of planes
X planes = 1025
Y planes = 1025
Ntotal = 1048576
Memory allocation for rho and RD: 237.233002 (ms)
Any ideas ?
regards,
Tijn