Hi
In the following code, I would like to collect the SM IDs that have executed the kernel. The kernel is a simple addition one and I have written this code:
__device__ uint get_smid(void) {
uint ret;
asm("mov.u32 %0, %smid;" : "=r"(ret) );
return ret;
}
__global__ void simpleAdd(float *v, int n, vector<int> &smVector)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n) {
int sm = get_smid();
smVector.push_back(sm);
v[i] = v[i] + 1;
}
}
...
int main()
{
...
simpleAdd<<<numBlocks, blockSize>>>(deviceVector, n, smVector);
...
}
But the error is that calling a host function from the device kernel is not allowed. I also tried this code to access vector elements by [] operator instead of push_back but get the same error.
__device__ uint get_smid(void) {
uint ret;
asm("mov.u32 %0, %smid;" : "=r"(ret) );
return ret;
}
__global__ void simpleAdd(float *v, int n, vector<int> &smVector)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n) {
int sm = get_smid();
smVector[sm]++;
v[i] = v[i] + 1;
}
}
...
int main()
{
...
smVector.resize(68);
simpleAdd<<<numBlocks, blockSize>>>(deviceVector, n, smVector);
...
}
Apart from the original question, which I still have that, using %nsmid gives different result than %smid. For example, for a short array on device, the %nsmid shows SM_68 while %smid shows SM_0.