What does the instance mean?

Dear experts,
what does the instance mean in the screenshot?


Thank you very much!
Chen

[6/8] Executing 'cuda_gpu_kern_sum' stats report

 Time (%)  Total Time (ns)  Instances  Avg (ns)   Med (ns)   Min (ns)  Max (ns)   StdDev (ns)                                                  Name                                                
 --------  ---------------  ---------  ---------  ---------  --------  ---------  -----------  ----------------------------------------------------------------------------------------------------
     24.4    6,699,857,869    182,028   36,806.7   14,335.0     1,984  1,054,500     95,039.9  void BoxKernel<PhaseRelPerm::[lambda(int, int, int) (instance 12)]>(T1, int, int, int)              
     18.7    5,134,744,349     31,766  161,642.8  129,792.0     2,208  9,585,345    200,396.2  void BoxKernel<Saturation::[lambda(int, int, int) (instance 6)]>(T1, int, int, int)                 
     15.7    4,298,160,518     30,352  141,610.5   81,920.5     1,855    997,091    179,394.0  void BoxKernel<PhaseRelPerm::[lambda(int, int, int) (instance 28)]>(T1, int, int, int)              
     11.1    3,050,015,392      7,224  422,205.9    7,552.0     4,896  2,461,385    664,539.3  void BoxKernel<InitMatrix::[lambda(int, int, int) (instance 1)]>(T1, int, int, int)                 
      4.8    1,320,669,506     43,344   30,469.5   17,536.0     2,016    958,052     71,472.5  void BoxKernel<PhaseRelPerm::[lambda(int, int, int) (instance 18)]>(T1, int, int, int)              
      4.2    1,156,840,542      7,224  160,138.5  120,560.5     1,856    821,954    182,035.6  void BoxKernel<Saturation::[lambda(int, int, int) (instance 8)]>(T1, int, int, int)                 
      2.7      751,635,231      7,224  104,047.0   61,168.5     1,856    764,482    152,463.2  void BoxKernel<PhaseRelPerm::[lambda(int, int, int) (instance 34)]>(T1, int, int, int)              
      2.5      680,079,555     19,812   34,326.6    4,608.0     1,952    418,177     59,140.2  void BoxKernel<NlFunctionEval::[lambda(int, int, int) (instance 11)]>(T1, int, int, int)            
      1.8      495,709,953      1,652  300,066.6  282,801.0   231,552  5,307,986    157,392.9  void BoxKernel<NlFunctionEval::[lambda(int, int, int) (instance 9)]>(T1, int, int, int)             
      1.8      492,429,031      9,912   49,680.1    3,200.0     1,984  1,457,125    107,751.8  void BoxKernel<NlFunctionEval::[lambda(int, int, int) (instance 14)]>(T1, int, int, int)            
      1.5      410,194,714     82,491    4,972.6    4,800.0     3,648    158,529      1,775.4  void BoxKernel<PhaseRelPerm::[lambda(int, int, int) (instance 11)]>(T1, int, int, int)              
      1.4      388,461,303      9,906   39,214.7    3,552.0     2,048    658,402     59,364.2  void BoxKernel<OverlandFlowEvalKin::[lambda(int, int, int) (instance 1)]>(T1, int, int, int)        
      1.1      304,815,092      1,652  184,512.8  106,272.0    15,296  1,438,469    199,426.0  void BoxKernel<NlFunctionEval::[lambda(int, int, int) (instance 6)]>(T1, int, int, int)             
      1.0      285,286,799      1,652  172,691.8   78,768.5    20,992  1,541,638    195,979.1  void BoxKernel<NlFunctionEval::[lambda(int, int, int) (instance 4)]>(T1, int, int, int)             
      1.0      269,461,801      1,213  222,144.9  223,905.0     2,592  2,846,570    219,022.1  void BoxKernel<RichardsJacobianEval::[lambda(int, int, int) (instance 16)]>(T1, int, int, int)      
      0.9      238,020,618      1,652  144,080.3  144,064.0    73,921  2,632,169     93,170.9  void BoxKernel<NlFunctionEval::[lambda(int, int, int) (instance 2)]>(T1, int, int, int)             
      0.8      213,412,690      9,288   22,977.2    3,616.0     2,016    582,242     60,741.8  void BoxKernel<RichardsJacobianEval::[lambda(int, int, int) (instance 6)]>(T1, int, int, int)       
      0.5      141,831,944        516  274,868.1  236,097.5   202,368  3,089,195    143,570.0  void BoxKernel<RichardsJacobianEval::[lambda(int, int, int) (instance 5)]>(T1, int, int, int)       
      0.4      113,716,999     27,009    4,210.3    3,968.0     3,903    161,056      2,444.0  void BoxKernel<SaturationOutputStatic::[lambda(int, int, int) (instance 1)]>(T1, int, int, int)     
      0.4      103,506,178      3,096   33,432.2    3,328.0     2,016  1,076,740     51,078.6  void BoxKernel<OverlandFlowEvalKin::[lambda(int, int, int) (instance 3)]>(T1, int, int, int)        
      0.4      101,356,587      1,652   61,353.9   72,176.0    14,048    592,802     56,260.7  void BoxKernel<PhaseSource::[lambda(int, int, int) (instance 2)]>(T1, int, int, int)                
      0.4       96,080,701        515  186,564.5  194,529.0   117,472    245,249     23,900.1  void BoxKernel<RichardsJacobianEval::[lambda(int, int, int) (instance 17)]>(T1, int, int, int)      
      0.3       77,048,206      5,190   14,845.5   14,784.0     9,568     15,904        813.5  void DotKernel<ReduceSumType<double>, PFVDotProd::[lambda(int, int, int) (instance 1)], double>(T2,…
      0.2       65,831,003        516  127,579.5  125,584.5   107,904  1,755,846     72,422.1  void BoxKernel<RichardsJacobianEval::[lambda(int, int, int) (instance 2)]>(T1, int, int, int)       
      0.2       60,632,243      4,756   12,748.6   13,280.0     7,264     14,912      1,328.6  void BoxKernel<PFVProd::[lambda(int, int, int) (instance 1)]>(T1, int, int, int)                    
      0.2       59,162,949      7,232    8,180.7    7,808.0     4,479  1,594,054     20,762.7  void BoxKernel<InitVectorAll::[lambda(int, int, int) (instance 1)]>(T1, int, int, int)              
      0.2       57,316,871      3,096   18,513.2    3,520.0     2,048    427,233     44,525.1  void BoxKernel<RichardsJacobianEval::[lambda(int, int, int) (instance 11)]>(T1, int, int, int)      
      0.2       50,948,010      6,192    8,228.0    3,072.0     1,856     78,689     10,294.6  void BoxKernel<RichardsJacobianEval::[lambda(int, int, int) (instance 8)]>(T1, int, int, int)       
      0.2       41,255,955      9,906    4,164.7    3,328.0     1,984      9,664      2,113.3  void BoxKernel<OverlandFlowEvalKin::[lambda(int, int, int) (instance 2)]>(T1, int, int, int)        
      0.1       29,460,694      2,498   11,793.7   11,392.0     7,328     15,040      1,049.1  void BoxKernel<PFVAxpy::[lambda(int, int, int) (instance 1)]>(T1, int, int, int)                    
      0.1       28,819,714      1,748   16,487.3   16,736.0    10,465     17,408        752.3  void DotKernel<ReduceSumType<double>, PFVWL2Norm::[lambda(int, int, int) (instance 1)], double>(T2,…
      0.1       27,373,806      1,548   17,683.3   17,505.0    11,808     23,008        923.0  void DotKernel<ReduceMaxType<double>, PFVMaxNorm::[lambda(int, int, int) (instance 1)], double>(T2,…
      0.1       24,807,803      1,968   12,605.6   13,056.0     7,456     13,505        964.8  void BoxKernel<PFVDiv::[lambda(int, int, int) (instance 1)]>(T1, int, int, int)                     
      0.1       21,959,758      2,271    9,669.6    8,416.0     5,280  1,567,589     44,997.3  void BoxKernel<PhaseDensity::[lambda(int, int, int) (instance 1)]>(T1, int, int, int)               
      0.1       21,541,415      2,684    8,025.9    8,064.0     5,120      8,800        413.9  void BoxKernel<PFVConstInit::[lambda(int, int, int) (instance 1)]>(T1, int, int, int)               
      0.1       18,812,830      1,548   12,153.0   10,528.0     6,912  1,386,533     34,976.7  void BoxKernel<PFVAbs::[lambda(int, int, int) (instance 1)]>(T1, int, int, int)                     
      0.1       16,473,991      4,137    3,982.1    3,968.0     2,687      8,672        225.7  void BoxKernel<InitVector::[lambda(int, int, int) (instance 1)]>(T1, int, int, int)                 
      0.1       15,719,631      1,036   15,173.4   15,200.0    11,936     15,776        407.0  void BoxKernel<PFVScaleDiff::[lambda(int, int, int) (instance 1)]>(T1, int, int, int)               
      0.1       14,570,773      1,036   14,064.5   14,144.0     9,599     14,273        547.8  void DotKernel<ReduceSumType<double>, PFVL1Norm::[lambda(int, int, int) (instance 1)], double>(T2, …
      0.0       11,287,317      1,036   10,895.1   10,944.0     7,360     11,456        419.1  void BoxKernel<PFVLin1::[lambda(int, int, int) (instance 1)]>(T1, int, int, int)                    
      0.0       10,742,270        932   11,526.0   10,688.0     7,105     13,249      1,235.2  void BoxKernel<PFVInv::[lambda(int, int, int) (instance 1)]>(T1, int, int, int)                     
      0.0        6,597,653        516   12,786.1   12,801.0     9,632     13,248        355.0  void BoxKernel<PFVNeg::[lambda(int, int, int) (instance 1)]>(T1, int, int, int)                     
      0.0        6,118,700        416   14,708.4   14,783.0    11,648     15,232        364.6  void BoxKernel<PFVDiff::[lambda(int, int, int) (instance 1)]>(T1, int, int, int)                    
      0.0        5,630,395        516   10,911.6   10,944.0     7,520     11,360        370.7  void BoxKernel<PFVSum::[lambda(int, int, int) (instance 1)]>(T1, int, int, int)                     
      0.0        5,022,546        520    9,658.7    9,727.0     6,592     10,208        425.6  void BoxKernel<PFVScaleBy::[lambda(int, int, int) (instance 1)]>(T1, int, int, int)                 
      0.0        4,330,326        517    8,375.9    8,416.0     5,952      8,896        310.4  void BoxKernel<PhaseDensity::[lambda(int, int, int) (instance 2)]>(T1, int, int, int)               
      0.0        3,540,977        200   17,704.9   17,728.0    12,288     18,528        606.4  void DotKernel<ReduceMinType<double>, PFVMin::[lambda(int, int, int) (instance 2)], double>(T2, T3,…
      0.0        1,048,862        200    5,244.3    4,896.0     3,199      6,048        608.8  void DotKernel<ReduceSumType<double>, PFVMin::[lambda(int, int, int) (instance 1)], double>(T2, T3,…
      0.0          593,954          2  296,977.0  296,977.0   291,745    302,209      7,399.2  void BoxKernel<Copy::[lambda(int, int, int) (instance 1)]>(T1, int, int, int)

I’m told that this originates from the C++ name-mangling, not from nsight:

" In specific, it relates to the lambda used to define the template.I’m pretty sure anonymous lambdas with the same type signature are assigned an instance number so they can still be uniquely identified in debugging and such."

“It appears when you make multiple lambdas in the same scope with the same type, it tacks on an incrementing integer to keep their types unique:”

1 Like

Thank you very much, Carl!