Cuda debugger API

Hey I’m having some trouble with cuda debugger api.(http://docs.nvidia.com/cuda/debugger-api/r_main.html#r_main). It works up to the point where it tries to run the kernel where it freezes. When it freezes in the main thread the event CUDBG_EVENT_INTERNAL_ERROR is added to the event queue with the error CUDBG_ERROR_INVALID_CONTEXT.

Is it possible that the error may be caused by the cuda kernel being launched in the same process as the debugger api?
What does an invalid context mean exactly?
Does it work on another machine?

Any help or ideas would be appreciated.

Code

#include <stdio.h>
#include <signal.h>
#include "cudadebugger.h"
#include "cuda_stdint.h"

#include <pthread.h>
#include <iostream>

CUDBGAPI cudbgAPI;

__global__ void helloWorld()
{
	printf("Hi\n");
}

//this is required becuase if the program freezes you want
//to be able to exit without requiring a restart
void finalizeAndExit( int filler = 0)
{
	printf("\nExiting Safely\n");
	cudbgAPI->finalize();
	exit(-1);
}

void cudbgError( CUDBGResult res )
{
	if ( res != CUDBG_SUCCESS )
	{
		printf ( " \x1b[33m \nERROR: %s\n\n", cudbgGetErrorString ( res ));
		finalizeAndExit();
	}

}

void printEvent()
{
	CUDBGEvent event;
	CUDBGResult res;
	for (res = cudbgAPI->getNextEvent( CUDBG_EVENT_QUEUE_TYPE_SYNC, &event);
		res == CUDBG_SUCCESS && event.kind != CUDBG_EVENT_INVALID;
		res = cudbgAPI->getNextEvent( CUDBG_EVENT_QUEUE_TYPE_SYNC, &event)) 
	{

		switch (event.kind)
		{		
			case CUDBG_EVENT_INVALID:
			printf("CUDBG_EVENT_INVALID\n");
			break;
			case CUDBG_EVENT_ELF_IMAGE_LOADED:
			printf("CUDBG_EVENT_ELF_IMAGE_LOADED\n");
			break;
			case CUDBG_EVENT_KERNEL_READY:
			printf("CUDBG_EVENT_KERNEL_READY\n");
			break;
			case CUDBG_EVENT_KERNEL_FINISHED:
			printf("CUDBG_EVENT_KERNEL_FINISHED\n");
			break;
			case CUDBG_EVENT_INTERNAL_ERROR:
			printf("CUDBG_EVENT_INTERNAL_ERROR	%s\n", cudbgGetErrorString( event.cases.internalError.errorType ) );
			break;
			case CUDBG_EVENT_CTX_PUSH:
			printf("CUDBG_EVENT_CTX_PUSH\n");
			break;
			case CUDBG_EVENT_CTX_POP:
			printf("CUDBG_EVENT_CTX_POP\n");
			break;
			case CUDBG_EVENT_CTX_CREATE:
			printf("CUDBG_EVENT_CTX_CREATE\n");
			break;
			case CUDBG_EVENT_CTX_DESTROY:
			printf("CUDBG_EVENT_CTX_DESTROY\n");
			break;
			case CUDBG_EVENT_TIMEOUT:
			printf("CUDBG_EVENT_TIMEOUT\n");
			break;
			case CUDBG_EVENT_ATTACH_COMPLETE:
			printf("CUDBG_EVENT_ATTACH_COMPLETE\n");
			break;
			case CUDBG_EVENT_DETACH_COMPLETE:
			printf("CUDBG_EVENT_DETACH_COMPLETE\n");
			break;
			case CUDBG_EVENT_ELF_IMAGE_UNLOADED:
			printf("CUDBG_EVENT_ELF_IMAGE_UNLOADED\n");
			break;
		}
	}
}
void *mannageEvent(void *null) 
{

	while( true ) {
		printEvent();
	}
}

int main(int argc, char const *argv[])
{
	signal( SIGINT, finalizeAndExit );

	//gets the api
	uint32_t major,minor,rev;
	cudbgGetAPIVersion ( &major, &minor, &rev );
	cudbgGetAPI( major, minor, rev, &cudbgAPI );
	cudbgError( cudbgAPI->initialize() );

	//starts thread to print out events
	pthread_t mannage_event_thread;
	pthread_create( &mannage_event_thread, NULL, mannageEvent, NULL);

	//Causes the program to freeze
	helloWorld<<<2,2,2>>>();

	//stop
	finalizeAndExit();
	
	return 0;
}

Console Output

CUDBG_EVENT_INTERNAL_ERROR	CUDBG_ERROR_INVALID_CONTEXT
CUDBG_EVENT_TIMEOUT

What I would expect from the successful program is that this event would happen CUDBG_EVENT_ELF_IMAGE_LOADED followed by CUDBG_EVENT_KERNEL_READY (maybe some context pushes or pops)

Did you ever figure this out (or does anybody else know how to use the debug API properly)? I’m running into the same issue.

I do not think I made much progress, however I do recall that the issue may be that I was trying to use the debugging api in and on the same process. If I recall correct I needed to attach to a subprocess in some way. You may want to look at how other debugging APIs work in order to hopefully get some inspiration.

If I recall correct I needed to attach to a subprocess in some way.

Ah, that’s a bummer. Makes sense, but I had hoped to be able to use cuda-dbg to catch and maybe recover from device-side exceptions/assertions from within the main process.