Hello all,

I wanted to increase the register usage for a kernel. So I set the compilation flag --maxrregcount=16 for the code below,

```
__global__ void kernel(float* xIN,float* yIN,float* zIN, float* tmIN, int NumVertices,
float* xOUT,float* yOUT,float* zOUT )
{
int i = threadIdx.x + blockIdx.x*blockDim.x; // get thread ID
float x;
float y;
float z;
if(i<NumVertices){
x = xIN[i];
y = yIN[i];
z = zIN[i];
xOUT[i] = tmIN[0] * x + tmIN[4] * y + tmIN[8] * z + tmIN[12];
yOUT[i] = tmIN[1] * x + tmIN[5] * y + tmIN[9] * z + tmIN[13];
zOUT[i] = tmIN[2] * x + tmIN[6] * y + tmIN[10] * z + tmIN[14];
}
}
```

The --ptxas-options=-v option shows,

ptxas info : Compiling entry function ‘*Z9transformPfS_S_S_iS_S_S*’ for ‘sm_13’

ptxas info : Used 8 registers, 64+16 bytes smem

But when I set --maxrregcount=32 and also declare some extra variables in the kernel like below, still the register usage is same?

```
__global__ void transform(float* xIN,float* yIN,float* zIN, float* tmIN, int NumVertices,
float* xOUT,float* yOUT,float* zOUT )
{
int i = threadIdx.x + blockIdx.x*blockDim.x; // get thread ID
float x;
float y;
float z;
register float a,b,c,d,f,r,w,q,s,h,n;
a=b=c=d=f=r=w=q=s=h=n=4;
a=b+c+d+f+r+w+q+s+h+n;
a=b+a;
if(i<NumVertices){
x = xIN[i];
y = yIN[i];
z = zIN[i];
xOUT[i] = tmIN[0] * x + tmIN[4] * y + tmIN[8] * z + tmIN[12];
yOUT[i] = tmIN[1] * x + tmIN[5] * y + tmIN[9] * z + tmIN[13];
zOUT[i] = tmIN[2] * x + tmIN[6] * y + tmIN[10] * z + tmIN[14];
}
}
```

ptxas info : Compiling entry function ‘*Z9transformPfS_S_S_iS_S_S*’ for ‘sm_13’

ptxas info : Used 8 registers, 64+16 bytes smem

note: arch=13

Thanks for any help in this regards.