Dear Mat,
NVHPC 23.7 has just released and I already did some testing in regards to our reported bugs over the last few months. Many of the bugs are fixed now which is great!
However, I noticed a small thing where I wanted to know if the change in the behavior is intended or not.
Up until now, we were using the following commands to compile an application and add OMPT to the application when we wanted to instrument it (simplified of course).
$ nvc -mp=ompt shared.c -shared -o libshared.so # This is how Score-P currently handles OMPT
$ nvc -mp main.c -c
$ nvc -mp=ompt main.o libshared.so
While this still works in NVHPC 23.7, the application behaves differently if we also use -mp=ompt
to compile main.c
. To illustrate this, we can look at the following example:
$ cat main.c
#include <omp-tools.h>
#include <stdio.h>
extern ompt_finalize_tool_t finalize_tool;
int
foo( int i )
{
return i;
}
int
main( void )
{
#pragma omp parallel
{
#pragma omp for
for ( int i = 1; i < 5; i++ )
{
foo( i );
}
}
finalize_tool();
return 1;
}
$ cat shared.c
#include <omp-tools.h>
#include <stdlib.h>
#include <stdio.h>
static int ws_loop_end_reached = 0;
static int initialized = 0;
ompt_finalize_tool_t finalize_tool;
void
callback_ompt_work( ompt_work_t work_type,
ompt_scope_endpoint_t endpoint,
ompt_data_t* parallel_data,
ompt_data_t* task_data,
uint64_t count,
const void* codeptr_ra )
{
if ( endpoint == ompt_scope_end )
{
ws_loop_end_reached = 1;
}
}
static int
ompt_initialize( ompt_function_lookup_t lookup,
int initial_device_num,
ompt_data_t* tool_data )
{
ompt_set_callback_t set_cb = ( ompt_set_callback_t )lookup( "ompt_set_callback" );
if ( !set_cb )
{
_Exit( 3 ); /* Tool got initialized but lookup of runtime-entry-point ompt_set_callback failed. */
}
finalize_tool = ( ompt_finalize_tool_t )lookup( "ompt_finalize_tool" );
if ( !finalize_tool )
{
_Exit( 4 ); /* Tool got initialized but lookup of runtime-entry-point ompt_finalize_tool failed. */
}
ompt_set_result_t result;
result = set_cb( ompt_callback_work, ( ompt_callback_t )&callback_ompt_work );
if ( result != ompt_set_always )
{
_Exit( 5 ); /* Tool got initialized but work cb couldn't be registered. */
}
initialized = 1;
return 1; /* non-zero indicates success for OMPT runtime. */
}
static void
ompt_finalize( ompt_data_t* tool_data )
{
if ( initialized == 1 )
{
if ( ws_loop_end_reached == 1 )
{
_Exit( 0 ); /* Tool got initialized and finalized. */
}
_Exit( 2 ); /* Tool got initialized and finalized but ws_loop_end was not reached. */
}
}
ompt_start_tool_result_t*
ompt_start_tool( unsigned int omp_version, /* == _OPENMP */
const char* runtime_version )
{
printf("Start tool\n");
static ompt_start_tool_result_t ompt_start_tool_result = { &ompt_initialize,
&ompt_finalize,
ompt_data_none };
return &ompt_start_tool_result;
}
$ nvc -mp=ompt shared.c -shared -o libshared.so # This is how Score-P currently handles OMPT
$ nvc -mp main.c -c
$ nvc -mp=ompt main.o libshared.so
$ ./a.out; echo $?
Start tool
2
$ nvc -mp=ompt,multicore main.c -c
$ nvc -mp=ompt main.o libshared.so
$ ./a.out; echo $?
Start tool
0
Without adding -mp=ompt
to the compile flags, we see the work-end
issue which was also present in previous NVHPC versions. If we add the flag, the issue is gone. My question is: Is this behavior intended and going forward, can we expect that we need to also add -mp=ompt
to the compile flags to use OMPT for user applications?