Access Violation caused by "cudaGraphicsMapResources"

Hello
I try to run the following code. There is an Error that cudaGraphicsMapResources causes an accessViolation. I don’t know why this happens? What could be the cause?

Here is the code:

#include "cuda_runtime.h"
		#include "device_launch_parameters.h"

		#include <stdio.h>


		#include <GL\glew.h>
		#include <GL\freeglut.h>


		#include "cuda_gl_interop.h"


        #include <iostream>
		
     


		/*  Create checkerboard texture  */
		const int N = 1024;
		#define checkImageWidth 1024
		#define checkImageHeight 1024
		#define SIZE_X 1024
		#define SIZE_Y 1024
		static GLubyte checkImage[1024*1024];
		/*static GLubyte checkImage[1024][1024][1];*/
		static GLuint texName;
		// Texture reference for 2D float texture
		float tex[1024][1024];
		
	
	
		size_t dsize = 1024 * 1024 * sizeof(float);
		
		struct mystruct{

			int x;
			int y;

		};







		void makeCheckImage(void)
		{
			int i, j, c;

			for (i = 0; i < 600; i++) {
				for (j = 0; j < 800; j++) {
					c = ((((i % 2) == 0)) ^ (j % 2 == 0)) * 255;
					checkImage[j + i*1024]  = (GLubyte)c;


				}
			}
		}




		__global__ void cudaMakeCheckImage(float *d_A)
		{

			int col = blockIdx.x * blockDim.x + threadIdx.x;
			int row = blockIdx.y * blockDim.y + threadIdx.y;


			int index = col + row * N;
			if (col < N && row < N)
			{
				d_A[index] = 255;
			}
		}
		



		void init(void)
		{
			glClearColor(0.0, 0.0, 0.0, 0.0);
			glShadeModel(GL_FLAT);
			glEnable(GL_DEPTH_TEST);
		
			
			
			
			glPixelStorei(GL_UNPACK_ALIGNMENT, 1);

			makeCheckImage();
			glGenTextures(1, &texName);
			glBindTexture(GL_TEXTURE_2D, texName);
			// set basic parameters
			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_NEAREST);
			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_NEAREST);
			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER,GL_NEAREST);
			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER,GL_NEAREST);
			// Create texture data 
			glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, checkImageWidth,
				checkImageHeight, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE,
				NULL);
			
			// Unbind the texture
			glBindTexture(GL_TEXTURE_2D, 0);

			
			cudaGraphicsResource* Res;

			// Allocate CUDA array in device memory
			cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0,
				cudaChannelFormatKindFloat);

			cudaArray* CuArr;

			cudaMallocArray(&CuArr, &channelDesc, 1024, 1024);
			dim3 block(32, 32);
			dim3 grid;
			grid.x = (N + block.x - 1) / block.x;
			grid.y = (N + block.y - 1) / block.y;
			float *host = new float[N*N];
			float *host_dA = new float[N*N];
			float *d_A;

			cudaMalloc((void**)&d_A, (N * N)*sizeof(float));
			cudaMakeCheckImage <<<grid, block >> >(d_A);

		
			cudaMemcpy(host_dA, d_A, N * N * sizeof(float), cudaMemcpyDeviceToHost);
			std::cout << "Wrote Matrix to host memory" << std::endl;
			std::cout << host_dA [0] << " , " << host_dA [1] << " , " << host_dA [2] << " , " << host_dA[3] << " , " << host_dA[4] << " , " << host_dA[5] << std::endl;
			std::cout << host_dA[1024] << " , " << host_dA[1025] << " , " << host_dA[1026] << " , " << host_dA[1027] << " , " << host_dA[1028] << " , " << host_dA[1029] << std::endl;

			



			cudaError_t eError = cudaGraphicsGLRegisterImage(&Res, texName, GL_TEXTURE_2D, cudaGraphicsMapFlagsWriteDiscard);
			std::cout << "L1" << std::endl;
			cudaError_t eError2 = cudaGraphicsMapResources(1, &Res, 0);
			std::cout << "L2" << std::endl;
			cudaMemcpy2DToArray(CuArr, 0, 0, d_A, 1024, 1024, 1024, cudaMemcpyDeviceToDevice);
			std::cout << "L3" << std::endl;
			cudaError_t eError3 =  cudaGraphicsSubResourceGetMappedArray(&CuArr, Res, 0, 0);
			std::cout << "L4" << std::endl;
			cudaError_t eError4 = cudaGraphicsUnmapResources(1, &Res, 0);

			cudaMemcpy(host, CuArr, N * N * sizeof(float), cudaMemcpyDeviceToHost);
			std::cout << "Write CudaArray to host memory" << std::endl;
			std::cout << host[0] << " , " << host[1] << " , " << host[2] << " , " << host[3] << " , " << host[4] << " , " << host[5] << std::endl;
			std::cout << host[1024] << " , " << host[1025] << " , " << host[1026] << " , " << host[1027] << " , " << host[1028] << " , " << host[1029] << std::endl;
			cudaFree(d_A);
			delete[] host_dA;
			delete[] host;

		}

		void display(void)
		{
			glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
			glEnable(GL_TEXTURE_2D);
			glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_DECAL);
			glBindTexture(GL_TEXTURE_2D, texName); /* binds texname wit active textureunit   */
			glBegin(GL_QUADS);
			glTexCoord2f(1.0 * 800 / 1024, 1.0 * 600 / 1024);  glVertex2f(1.0, 1.0);
			glTexCoord2f(1.0 * 800 / 1024, 0.0);  glVertex2f(1.0, -1.0);
			glTexCoord2f(0.0, 0.0); glVertex2f(-1.0, -1.0);
			glTexCoord2f(0.0, 1.0 * 600 / 1024); glVertex2f(-1.0, 1.0);

	
			glEnd();
			glFlush();
			glBindTexture(GL_TEXTURE_2D, 0); /*  unbinds texname with active textureunit  ?? */
			glDisable(GL_TEXTURE_2D);
		}





		void keyboard(unsigned char key, int x, int y)
		{
			switch (key) {
			case 27:
				exit(0);
				break;
			default:
				break;
			}
		}

		__global__ void matrix(float *d_A)
		{
			int col = blockIdx.x * blockDim.x + threadIdx.x;
			int row = blockIdx.y * blockDim.y + threadIdx.y;


			int index = col + row * N;
			if (col < N && row < N)
			{
				d_A[index] = 255;
			}
		}

		int main(int argc, char** argv)
		{
		
			
			float *d_B;

			cudaMalloc((void**)&d_B, (N * N)*sizeof(float));

			cudaFree(d_B);
		
			
			glutInit(&argc, argv);
			glutInitDisplayMode(GLUT_SINGLE | GLUT_RGB | GLUT_DEPTH | GLUT_BORDERLESS | GLUT_CAPTIONLESS);
			glutInitWindowSize(800, 600);
			glutInitWindowPosition(100, 100);
			glutCreateWindow(argv[0]);
			cudaSetDevice(0);
			cudaGLSetGLDevice(0);

			init();
	
			glutDisplayFunc(display);
			
			glutKeyboardFunc(keyboard);

		

			glutMainLoop();
			return 0;
		}

You are storing the the error code in cudaError_t variables but not doing anything with them.
After line 142, add a:

printf("CUDA error: %s\n", cudaGetErrorString(eError2));

It will tell you what is the error message of the return code of the cudaGraphicsMapResources() call, since it seems to be the one causing problem.

You can also have a look here, specially Njuffa’s reply:
https://devtalk.nvidia.com/default/topic/525246/is-cudasafecall-no-longer-needed-/

Add the macro to your source file (or any header you might have for your custom/personal definitions), then wrap your CUDA calls (not kernel function calls) with it. If there is an error when calling a particular API function, it will show you exactly where and what is the error.

Try this and see what you get.

Thank you for your reply! The access violation vanisched by defining the graphics resource outside the init(function). Unfortunately there are still errors. I think I am doing a bunch of things wrong in the part where I register the image. Do you have an idea what I have to change, for me this is all new and I would be really glad for an advice. If the programm works one see a grey screen, but the screen is black. So I think the array in the cudaarray is not given to the texture by OpenGL.

Also I think I haven’t understood how the program works, as I think I have done everything right, which is obviusly not the case…

The way I understand the written program is as follows:
By calling “cudaGraphicsGLRegisterImage(&Res, texName, GL_TEXTURE_2D, cudaGraphicsMapFlagsNone);”
Cuda knows that it has to send the texture in “Res” to “texName” when “Res” is mapped.

Then next command:“cudaGraphicsMapResources(1, &Res, 0);”
means that there is one element in the “Res” which is now ready to be mapped.

“cudaGraphicsSubResourceGetMappedArray(&CuArr, Res, 0, 0);”
means that the CudaArray “CuArr” is now part of “Res”. And as “Res” is registered with texName, texName is now filled with the Data in “CuArr”

the next command “CudaUnmapResources”(1, Res, 0);is needed to stop the Mapping so that
OpenGl can bind the texture again with :

“glEnable(GL_TEXTURE_2D);
glBindTexture(GL_TEXTURE_2D, texName);”

I am sure I misunderstood something, but I don’t see where.

Implementing the error prints like you adviced me leads to:

“CUDA error: OS call failed or operation not supported on this OS”

Why is registering an Image not supported with my OS (Window 10 Professional)?

I post the complete changed code below. Do you get the same error message?

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <stdio.h>
#include <GL\glew.h>
#include <GL\freeglut.h>
#include "cuda_gl_interop.h"
#include <iostream>

const int N = 1024;
static GLuint texName;
size_t dsize = 1024 * 1024 * sizeof(float);
cudaGraphicsResource *Res;
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
cudaArray* CuArr;



__global__ void cudaMakeCheckImage(float *d_A)
{

	int col = blockIdx.x * blockDim.x + threadIdx.x;
	int row = blockIdx.y * blockDim.y + threadIdx.y;


	int index = col + row * N;
	if (col < N && row < N)
	{
		d_A[index] = 128;
	}
}



void init(float *d_A)
{
	glClearColor(0.0, 0.0, 0.0, 0.0);
	glShadeModel(GL_FLAT);
	glEnable(GL_DEPTH_TEST);




	glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
	glGenTextures(1, &texName);
	glBindTexture(GL_TEXTURE_2D, texName);

	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_NEAREST);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_NEAREST);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
	
	glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, N,N, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE,NULL);
	glDisable(GL_TEXTURE_2D);


	float *host = new float[N*N];
	float *host_dA = new float[N*N];
	size_t spitch, pitch;
	dim3 block(32, 32);
	dim3 grid;
	grid.x = (N + block.x - 1) / block.x;
	grid.y = (N + block.y - 1) / block.y;



	cudaMallocPitch((void**)&d_A, &pitch, N *sizeof(float), N);
	cudaMakeCheckImage << <grid, block >> >(d_A);
	

	cudaMallocArray(&CuArr, &channelDesc, N, N);
	cudaMemcpy(host_dA, d_A, N * N * sizeof(float), cudaMemcpyDeviceToHost);

	std::cout << "Wrote matrix from device memory to host memory:" << std::endl;
	std::cout << host_dA[0] << " , " << host_dA[1] << " , " << host_dA[2] << " , " << host_dA[3] << " , " << host_dA[4] << " , " << host_dA[5] << std::endl;
	std::cout << host_dA[1024] << " , " << host_dA[1025] << " , " << host_dA[1026] << " , " << host_dA[1027] << " , " << host_dA[1028] << " , " << host_dA[1029] << std::endl;

	cudaMemcpy2DToArray(CuArr, 0, 0, d_A, pitch, N* sizeof(float), N, cudaMemcpyDeviceToDevice);


	cudaMemcpyFromArray(host, CuArr, 0, 0, N * N * sizeof(float), cudaMemcpyDeviceToHost);
	std::cout << "Wrote CudaArray to host memoryM" << std::endl;
	std::cout << host[0] << " , " << host[1] << " , " << host[2] <<  std::endl;
	std::cout << host[1024 * 1024 - 1] << " , " << host[1025] << " , " << host[1026] <<  std::endl;


	cudaError error = cudaGraphicsGLRegisterImage(&Res, texName, GL_TEXTURE_2D, cudaGraphicsMapFlagsNone);
	printf("CUDA error: %s\n", cudaGetErrorString(error));
	cudaError_t eError2 = cudaGraphicsMapResources(1, &Res, 0);
	printf("CUDA error: %s\n", cudaGetErrorString(eError2));
	
	cudaError_t eError3 = cudaGraphicsSubResourceGetMappedArray(&CuArr, Res, 0, 0);
	printf("CUDA error: %s\n", cudaGetErrorString(eError3));

	cudaError_t eError4 = cudaGraphicsUnmapResources(1, &Res, 0);
	printf("CUDA error: %s\n", cudaGetErrorString(eError4));

	glEnable(GL_TEXTURE_2D);
	glBindTexture(GL_TEXTURE_2D, texName);

	cudaFree(d_A);
	delete[] host_dA;
	delete[] host;

}

void display(void)
{
	glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
	glEnable(GL_TEXTURE_2D);
	glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_DECAL);
	glBindTexture(GL_TEXTURE_2D, texName); /* binds texname wit active textureunit   */
	glBegin(GL_QUADS);
	glTexCoord2f(1.0 * 800 / 1024, 1.0 * 600 / 1024);  glVertex2f(1.0, 1.0);
	glTexCoord2f(1.0 * 800 / 1024, 0.0);  glVertex2f(1.0, -1.0);
	glTexCoord2f(0.0, 0.0); glVertex2f(-1.0, -1.0);
	glTexCoord2f(0.0, 1.0 * 600 / 1024); glVertex2f(-1.0, 1.0);


	glEnd();
	glFlush();
	glBindTexture(GL_TEXTURE_2D, 0); /*  unbinds texname with active textureunit  ?? */
	glDisable(GL_TEXTURE_2D);
}





void keyboard(unsigned char key, int x, int y)
{
	switch (key) {
	case 27:
		exit(0);
		break;
	default:
		break;
	}
}

int main(int argc, char** argv)
{


	float *d_B;
	cudaMalloc((void**)&d_B, (N * N)*sizeof(float));
	


	glutInit(&argc, argv);
	glutInitDisplayMode(GLUT_SINGLE | GLUT_RGB | GLUT_DEPTH | GLUT_BORDERLESS | GLUT_CAPTIONLESS);
	glutInitWindowSize(800, 600);
	glutInitWindowPosition(100, 100);
	glutCreateWindow(argv[0]);
	cudaSetDevice(0);
	cudaGLSetGLDevice(0);

	init(d_B);

	glutDisplayFunc(display);

	glutKeyboardFunc(keyboard);


	
	glutMainLoop();
	cudaFree(d_B);
	return 0;
}


























}

There is a possibility that OpenGL applications by default open on Intel graphics instead of nVidia (mostly on laptops, but some desktop systems with Intel IGP might also be affected). As a result of this, the resource sharing between CUDA and OpenGL would be impossibe.

You could query the GL_VENDOR string once the OpenGL context is initialized to see whether it’s Intel Corp. or nVidia that’s providing the graphics. In case of Intel, use the nVidia control panel to force this application to run on nVidia high performance graphics.

Using the command glGetString(GL_VENDOR) results in “Microsoft Cooperation” and using the Command glGetString(GL_RENDERER) gives "GDI GENERIC. So you were right… CUDA and Opengl can’t communicate because they are not on the same device. After forcing the GTX 1080 to render OpenGL for this program there was no change. What else could be done, so that OpenGL is rendered on the Nvidea GTX 1080?

I know the problem now. I had access with Windows Remote Desktop. Now I am using RealVNC and the Vendor is now: “NVIDIA Corporation” and the RENDERER is now: " GeForce GTX 1080/PCIe/SSE2". Also “cudaGraphicsGLRegisterImage” is throwing no error anymore. But the screen is still black instead of grey. Do you have an idea what could be wrong in my code?