Can I get the number of Tensor cores of my GPU?

cnmcdee · December 14, 2022, 4:37pm

It should be added to the cudaGetDeviceProperties because that function gets a LOT of information:

The properties I am dumping from the call:

It would be suggested to step this code might hold the tensor core parameter in the latest toolkit if you stepped it with a card that has Tensor cores? The 1660 does not.

void Cuda_device_poll(cudaDeviceProp* props, int device_count)
{
    cudaGetDeviceCount(&device_count);
    if (device_count > 0)
      for(int t = 0; t < device_count; t++)
    {
        printf("Found %d cuda device...\n", device_count);
        cudaGetDeviceProperties(&props[t], 0);
    }
    if (device_count == 0)
    {
        printf("No cuda devices found exiting...\n");
        exit(-1);
    }
}

void Cuda_print_properties(cudaDeviceProp* prop)
{
    int card_count  = Cuda_device_count();
    for (int t = 0; t < card_count; t++)
    {
        cudaDeviceProp a = prop[t];
        printf("Device: %d [%s] \n", t, a.name);
        unsigned char uuid[16] = {0};
        for (int u = 0; u < 16; u++)
        {
           uuid[u] = a.uuid.bytes[u];
        }
        printf("               UUID: %x%x%x%x-%x%x%x%x-%x%x%x%x-%x%x%x%x\n",uuid[0], uuid[1], uuid[2], uuid[3], uuid[4], uuid[5], uuid[6], uuid[7], uuid[8],
               uuid[9], uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15]);
        unsigned char luid[8] = {0};
        for (int u = 0; u < 8; u++)
        {
            luid[u] = a.luid[u];
        }
        printf("               LUID: %x%x%x%x-%x%x%x%x\n", luid[0], luid[1], luid[2], luid[3], luid[4], luid[5], luid[6], luid[7]);

        printf(" luidDeviceNodeMask: %u\n", a.luidDeviceNodeMask);
        char metric[20] = {0};
        size_t total_mem = a.totalGlobalMem;
        char_to_NB(total_mem, metric);
        printf("     totalGlobalMem: %ld\n", a.totalGlobalMem);
        printf("  sharedMemPerBlock: %zu\n", a.sharedMemPerBlock);
        printf("       regsPerBlock: %d\n", a.regsPerBlock);
        printf("           warpSize: %d\n", a.warpSize);
        printf("           memPitch: %zu\n", a.memPitch);
        printf("          clockRate: %d\n", a.clockRate);
        printf("    memoryClockRate: %d\n", a.memoryClockRate);
        printf("     memoryBusWidth: %d\n", a.memoryBusWidth);
        printf(" maxThreadsPerBlock: %d\n", a.maxThreadsPerBlock);
        printf("maxThreadsPerMultiP: %d\n", a.maxThreadsPerMultiProcessor);
        printf("ShrdMemPerMultiProc: %zu\n", a.sharedMemPerMultiprocessor);
        printf("regsPerMultiProcess: %d\n", a.regsPerMultiprocessor);
        printf("   maxThreadsDim(x): %d\n", a.maxThreadsDim[0]);
        printf("   maxThreadsDim(y): %d\n", a.maxThreadsDim[1]);
        printf("   maxThreadsDim(z): %d\n", a.maxThreadsDim[2]);
        printf("     maxGridSize(x): %d\n", a.maxGridSize[0]);
        printf("     maxGridSize(y): %d\n", a.maxGridSize[1]);
        printf("     maxGridSize(y): %d\n", a.maxGridSize[2]);
        printf("      totalConstMem: %zu\n", a.totalConstMem);
        printf("              major: %d\n", a.major);
        printf("              minor: %d\n", a.minor);
        printf("   textureAlignment: %zu\n", a.textureAlignment);
        printf("      deviceOverlap: %d\n", a.deviceOverlap);
        printf("multiProcessorCount: %d\n", a.multiProcessorCount);
        printf("kernelExecTimeOEnab: %d\n", a.kernelExecTimeoutEnabled);
        printf("         integrated: %d\n", a.integrated);
        printf("   canMapHostMemory: %d\n", a.canMapHostMemory);
        printf("        computeMode: %d\n", a.computeMode);
        printf("       maxTexture1D: %d\n", a.maxTexture1D);
        printf(" maxTexture1DMipmap: %d\n", a.maxTexture1DMipmap);
        printf(" maxTexture1DLinear: %d\n", a.maxTexture1DLinear);
        printf("    surfaceAlignemt: %zu\n", a.surfaceAlignment);
        printf("  concurrentKernels: %d\n", a.concurrentKernels);
        printf("         ECCEnabled: %d\n", a.ECCEnabled);
        printf("           pciBusID: %d\n", a.pciBusID);
        printf("        pciDeviceID: %d\n", a.pciDeviceID);
        printf("        pciDomainID: %d\n", a.pciDomainID);
        printf("          tccDriver: %d\n", a.tccDriver);
        printf("   asyncEngineCount: %d\n", a.asyncEngineCount);
        printf(" streamPrioritiesSp: %d\n", a.streamPrioritiesSupported);
        printf("globalL1CacheSupprt: %d\n", a.globalL1CacheSupported);
        printf(" localL1CacheSupprt: %d\n", a.localL1CacheSupported);
        printf("\n");
    }
}