After cudaMalloc, host stalls

Hey, everybody,

I’m running the host code below:

[codebox]#include “cutil.h”

#include “sys/timeb.h”

#include <stdio.h>

#include “sorCUDAKernel.cu”

double read_timer() {

struct timeb tm;

ftime(&tm);

return((double)tm.time + (double)tm.millitm/1000.0);

}

int main() {

CUT_DEVICE_INIT();

double start, stop, totaltime;

int size = 10;

int i, j;

srand(345);

float *d_randMatrix, *randMatrix,*result;

randMatrix = (float *)malloc(sizeof(float)sizesize);

result = (float *)malloc(sizeof(float)sizesize);

for(i=0;i<size;i++) {

    for(j=0;j<size;j++) {

        randMatrix[i*size+j] = (rand()/(float)RAND_MAX)*1E-6;

        result[i*size+j] = 0.0;

    }

}

printf(“before memory allocation\n”);

cudaMalloc((void**)&d_randMatrix,sizeof(float)*size*size);

printf(“device memory allocated\n”);

cudaMemcpy(d_randMatrix,randMatrix,sizeof(float)sizesize,c

udaMemcpyHostToDevice);

printf("Device memory copied to\n");

dim3 grid(200,1,1);

dim3 thread(256,1,1);

start = read_timer();

printf("Entering kernel\n");

sorCUDAKernel<<<grid,thread>>>(1.25, d_randMatrix,100,size);

cudaMemcpy(result,d_randMatrix,sizeof(float)*size*size,cudaM

emcpyDeviceToHost);

stop = read_timer();

printf("%f\n",stop-start);

return(0);

}

[/codebox]

however, the program stalls, doesnt stop executing but doesnt go on, once it hits the cudaMalloc(). I’ve never encountered this before, has anyone else? Any suggestions on fixes?

Thanks in advance.