Vista 64, SDK 2Beta2 32 bit,
I am trying to free up CPU for computation, but when I do cudaMemcpyAsync it returns error “invalid argument”. I was trying to compare this to asyncAPI sample, but failed :-S
Here is how I allocate page-locked memory:
cudaMallocHost((void**)&gpu[i].common_h, sizeof(md5_data));
I’ve double checked that pointer was not changed anywhere after allocation.
Any ideas what may cause the problem?
Also asyncAPI throws alos of first-chance exceptions - is that normal?
cudaEvent_t stop;
CUDA_SAFE_CALL( cudaEventCreate(&stop) );
printf("%s\n", cudaGetErrorString(cudaGetLastError()));
CUDA_SAFE_CALL( cudaThreadSynchronize() );
printf("%s\n", cudaGetErrorString(cudaGetLastError()));
printf("%s\n", cudaGetErrorString(cudaGetLastError()));
printf("1: %f\n", getTimeDelta(tmp));
cudaMemcpy(data_d, gpu[device_id].data_h, sizeof(int)*4*thread_n*grid_n*gpu[device_id].keys_per_thread, cudaMemcpyHostToDevice);
printf("%s\n", cudaGetErrorString(cudaGetLastError()));
printf("2: %f\n", getTimeDelta(tmp));
md5_gpu_bruteforce_thread<<<grid, threads>>>(data_d, common_d, perm::pwd_len, perm::gpu_len, perm::charset_len, gpu[device_id].keys_per_thread);
printf("%s\n", cudaGetErrorString(cudaGetLastError()));
printf("3: %f\n", getTimeDelta(tmp));
cudaMemcpyAsync(gpu[device_id].common_h, common_d, sizeof(md5_data), cudaMemcpyDeviceToHost, NULL);
printf("%s\n", cudaGetErrorString(cudaGetLastError()));
while( cudaEventQuery(stop) == cudaErrorNotReady )
{
Sleep(1);
}
CUT_CHECK_ERROR(CUDA_SAFE_CALL( cudaEventDestroy(stop) ));