This isn’t really a question concerning CUDA, but rather a memory management issue I ran into while programming CUDA. I’m a novice in C programming and this is as far as I can see an error originating from faulty memory management. I’m used to java programming so this is quite new to me.
This is a function in my CUDA program and the idea is to run it several times in a for loop. It runs like it should the first time, but the second time it hangs on the line indicated by the comment in the code.
[codebox]int* calculateNewArray(int *oldMatrix, int oldSize, int oldRowLength, int newSize, int newRowLength, int newNumOfRows, int extraRows){
printf("Test3\n");
int *newMatrix_h, *newMatrix_d, *oldMatrix_d, *newMatrixReturned;
size_t newMSize = newSize*sizeof(int);
size_t oldMSize = oldSize*sizeof(int);
printf("Test4\n");
//Allocating space on the host for the result of the addition step
newMatrix_h = (int *)malloc(newMSize); //<--------------------------The program hangs on this line the second time the function is run
printf("Test5\n");
newMatrixReturned = (int *)malloc(newMSize);
//Allocating space on the device for the old and the new Matrix
cudaMalloc((void **) &newMatrix_d, newMSize);
cudaMalloc((void **) &oldMatrix_d, oldMSize);
//Initializing array on host
for(int i=0; i<newSize; i++){
newMatrix_h[i]=(int)0;
}
//Copying the old and the new Matrix to the device
cudaMemcpy(newMatrix_d, newMatrix_h, sizeof(int)*oldMSize, cudaMemcpyHostToDevice);
cudaMemcpy(oldMatrix_d, oldMatrix, sizeof(int)*oldMSize, cudaMemcpyHostToDevice);
//Computing execution configuration
int blockSize = 100;
int nBlocks = 1;
//Running the kernel which adds together the rows of the old matrix
addMatrix <<< nBlocks, blockSize >>> (oldMatrix_d, oldRowLength, newMatrix_d, newSize, newRowLength, newNumOfRows, extraRows);
printf("Test6\n");
//Copying the result back to the host
cudaMemcpy(newMatrixReturned, newMatrix_d, sizeof(int)*newMSize, cudaMemcpyDeviceToHost);
printf("Test7\n");
return newMatrixReturned;
}[/codebox]
What should I do to make this function run several times without failing?