HMM support in Linux driver

I have installed Jerome Glisse’s kernel with HMM support (https://cgit.freedesktop.org/~glisse/linux/?h=hmm-v23) (see also https://lkml.org/lkml/2017/5/24/731).

I’m using driver 381.22. Should I expect this driver to be able to use the HMM capabilities of the kernel?

float *gpuData = static_cast<float*>( malloc( sizeof(float)*size));
CUDA_CHECK_RETURN(cudaMemcpy(gpuData, data, sizeof(float)*size, cudaMemcpyHostToDevice));

Against my 1080Ti, the above code throws an error. I think if the driver were HMM-aware the above code would succeed. Is that correct?

And assuming the answer is “the 381.22 driver is not HMM-aware”, is there a HMM-aware driver I might use?

Thank you!

[Formerly misposted in Linux graphics forum]

After making corrections to the code snippet above I’m confident the answer is, the current driver 381.22 does not recognize HMM capability in the kernel.

I’d love to be a guinea pig for a beta driver that does support HMM.

For completeness here’s the test program. It succeeds if you set HMM = 0; it fails if HMM = 1.

#include <iostream>
#include <numeric>
#include <stdlib.h>

static void CheckCudaErrorAux (const char *, unsigned, const char *, cudaError_t);
#define CUDA_CHECK_RETURN(value) CheckCudaErrorAux(__FILE__,__LINE__, #value, value)

/**
 * CUDA kernel that computes reciprocal values for a given vector
 */
__global__ void reciprocalKernel(float *data, unsigned vectorSize) {
	unsigned idx = blockIdx.x*blockDim.x+threadIdx.x;
	if (idx < vectorSize)
		data[idx] = 1.0/data[idx];
}

const int HMM =1 ;

/**
 * Host function that copies the data and launches the work on GPU
 */
float *gpuReciprocal(float *data, unsigned size)
{
	float *rc = new float;
	memset (rc, 0, size*sizeof(float));
	float *gpuData;

	if (HMM)
	{
		gpuData = static_cast<float*>( calloc( size, sizeof(float)));
		memcpy(gpuData, data, sizeof(float)*size);

	}
	else
	{
		CUDA_CHECK_RETURN(cudaMalloc((void **)&gpuData, sizeof(float)*size));
		CUDA_CHECK_RETURN(cudaMemcpy(gpuData, data, sizeof(float)*size, cudaMemcpyHostToDevice));
	}
	static const int BLOCK_SIZE = 256;
	const int blockCount = (size+BLOCK_SIZE-1)/BLOCK_SIZE;
	reciprocalKernel<<<blockCount, BLOCK_SIZE>>> (gpuData, size);

	CUDA_CHECK_RETURN( cudaPeekAtLastError() );
	CUDA_CHECK_RETURN(  cudaDeviceSynchronize() );

	if(HMM)
	{
		memcpy(rc, gpuData, sizeof(float)*size);
		free(gpuData);
	}
	else
	{
		CUDA_CHECK_RETURN(cudaMemcpy(rc, gpuData, sizeof(float)*size, cudaMemcpyDeviceToHost));
		CUDA_CHECK_RETURN(cudaFree(gpuData));
	}

	return rc;
}

float *cpuReciprocal(float *data, unsigned size)
{
	float *rc = new float;
	for (unsigned cnt = 0; cnt < size; ++cnt) rc[cnt] = 1.0/data[cnt];
	return rc;
}

void initialize(float *data, unsigned size)
{
	for (unsigned i = 0; i < size; ++i)
		data[i] = .5*(i+1);
}

int main(void)
{
	static const int WORK_SIZE = 65530;
	float *data = new float[WORK_SIZE];

	initialize (data, WORK_SIZE);

	float *recCpu = cpuReciprocal(data, WORK_SIZE);
	float *recGpu = gpuReciprocal(data, WORK_SIZE);
	float cpuSum = std::accumulate (recCpu, recCpu+WORK_SIZE, 0.0);
	float gpuSum = std::accumulate (recGpu, recGpu+WORK_SIZE, 0.0);

	/* Verify the results */
	std::cout<<"gpuSum = "<<gpuSum<< " cpuSum = " <<cpuSum<<std::endl;

	/* Free memory */
	delete[] data;
	delete[] recCpu;
	delete[] recGpu;

	return 0;
}

/**
 * Check the return value of the CUDA runtime API call and exit
 * the application if the call has failed.
 */
static void CheckCudaErrorAux (const char *file, unsigned line, const char *statement, cudaError_t err)
{
	if (err == cudaSuccess)
		return;
	std::cerr << statement<<" returned " << cudaGetErrorString(err) << "("<<err<< ") at "<<file<<":"<<line << std::endl;
	exit (1);
}

Hi,

Did anyone successfully run HMM in NVIDIA driver and Linux with the latest NVIDIA driver on Pascal or other GPUs? If you did, which GPU/ driver version/ Linux version did you use?

Thanks,
Kevin

It looks like HMM is still not supported as of the 440 driver.
HMM is turned off by default and the function to implement this in uvm8_hmm.c is left as a TODO.

static void mirror_sync_cpu_device_pagetables(struct hmm_mirror *mirror, enum hmm_update_type update, unsigned long start, unsigned long end)
{
// TODO: Bug 1750144: Implement this
}

static const struct hmm_mirror_ops mirror_ops = {
.sync_cpu_device_pagetables = &mirror_sync_cpu_device_pagetables,
};

Given that this was originally posted about 3 years ago now, is this functionality planned for a future version of the driver? Getting HMM support in the driver would be awesome!