Hi Gupadhyaya!!
Sorry, it was my fault.
It was actually a stack size problem within the thread, increasing it solved the problem.
This was the sample script I used:
#!/usr/bin/env micropython
import ffi
import sys
import time as clock
import uctypes as ctypes
import _thread as thread
MAXSTRINGLEN = 256
pointer_t = {
'value': ctypes.UINT64 | 0,
}
libml = ffi.open('libnvidia-ml.so')
nvmlInit = libml.func('i','nvmlInit','')
nvmlShutdown = libml.func('i','nvmlShutdown','')
nvmlErrorString = libml.func('s','nvmlErrorString','i')
nvmlDeviceGetName = libml.func('i','nvmlDeviceGetName','ppi')
nvmlDeviceGetCount = libml.func('i','nvmlDeviceGetCount','p')
nvmlDeviceGetPowerUsage = libml.func('i','nvmlDeviceGetPowerUsage', 'pp')
nvmlDeviceGetHandleByIndex = libml.func('i','nvmlDeviceGetHandleByIndex', 'ip')
def new(sdesc):
buf = bytearray(ctypes.sizeof(sdesc))
return ctypes.struct(ctypes.addressof(buf), sdesc, ctypes.NATIVE)
def thread_start(device):
power = new(pointer_t)
res = nvmlDeviceGetPowerUsage(device.value, power)
if (res != 0):
print("nvmlDeviceGetPowerUsage:", nvmlErrorString(res))
print (power.value)
res = nvmlInit()
if (res != 0):
print("nvmlInit:", nvmlErrorString(res))
sys.exit(6)
numDevices = new(pointer_t)
res = nvmlDeviceGetCount(numDevices)
if (res != 0):
print("nvmlDeviceGetCount:", nvmlErrorString(res))
sys.exit(7)
for i in range(numDevices.value):
device = new(pointer_t)
res = nvmlDeviceGetHandleByIndex (i, device)
if (res != 0):
print("nvmlDeviceGetHandleByIndex:", nvmlErrorString(res))
sys.exit(8)
devname = bytearray(MAXSTRINGLEN)
res = nvmlDeviceGetName(device.value, ctypes.addressof(devname), len(devname))
if (res != 0):
print("nvmlDeviceGetName:", nvmlErrorString(res))
sys.exit(9)
print ("GPU(%d): %s" % (i, devname.decode().rstrip('\0')))
thread.stack_size(1048576)
thread.start_new_thread(
thread_start,
(device,))
clock.sleep(1)
res = nvmlShutdown()
if (res != 0):
print("nvmlShutdown:", nvmlErrorString(res))
sys.exit(10)
sys.exit(0)
Which could be translated to this in C:
/**
* Originally sourced from https://github.com/johnelse/nvml-experiments/tree/master/basic_dlopen
*/
#include <stdio.h>
#include <stdlib.h>
#include <dlfcn.h>
#include <nvml.h>
#include <pthread.h>
#include <unistd.h>
unsigned int getPower(int, nvmlDevice_t, nvmlReturn_t (*)(nvmlDevice_t, unsigned int*), char* (*)(nvmlReturn_t););
/**
* - Load the NVML library via dlopen.
* - Look up some symbols:
* - nvmlErrorString
* - nvmlInit
* - nvmlShutdown
* - nvmlSystemGetDriverVersion
* - Using these symbols:
* - Initialise the library.
* - Get the driver version.
* - Shut down the library.
* - Finally, unload the library with dlclose.
*/
int main(int argv, char** argc) {
void *nvml_handle;
int err;
nvmlReturn_t nvml_err;
char *error;
unsigned int i;
unsigned int device_count;
nvmlDevice_t device;
nvmlPciInfo_t pci_info;
char* (*_nvmlErrorString)(nvmlReturn_t);
nvmlReturn_t (*_nvmlInit)(void);
nvmlReturn_t (*_nvmlShutdown)(void);
nvmlReturn_t (*_nvmlSystemGetDriverVersion)(char*, unsigned int);
nvmlReturn_t (*_nvmlDeviceGetCount)(unsigned int*);
nvmlReturn_t (*_nvmlDeviceGetHandleByIndex)(unsigned int, nvmlDevice_t*);
nvmlReturn_t (*_nvmlDeviceGetPciInfo)(nvmlDevice_t, nvmlPciInfo_t*);
nvmlReturn_t (*_nvmlDeviceGetPowerUsage)(nvmlDevice_t, unsigned int*);
char driver_version[NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE];
// Open a handle to the shared library.
nvml_handle = dlopen("libnvidia-ml.so", RTLD_LAZY);
if (!nvml_handle) {
fprintf(stderr, "Failed to load library: %s\n", dlerror());
exit(1);
}
printf("Library opened\n");
// Load some symbols.
_nvmlErrorString = dlsym(nvml_handle, "nvmlErrorString");
if((error = dlerror()) != NULL) {
fprintf(stderr, "%s\n", error);
exit(1);
}
printf("Found nvmlErrorString\n");
_nvmlInit = dlsym(nvml_handle, "nvmlInit");
if((error = dlerror()) != NULL) {
fprintf(stderr, "%s\n", error);
exit(1);
}
printf("Found nvmlInit\n");
_nvmlShutdown = dlsym(nvml_handle, "nvmlShutdown");
if((error = dlerror()) != NULL) {
fprintf(stderr, "%s\n", error);
exit(1);
}
printf("Found nvmlShutdown\n");
_nvmlSystemGetDriverVersion =
dlsym(nvml_handle, "nvmlSystemGetDriverVersion");
if((error = dlerror()) != NULL) {
fprintf(stderr, "%s\n", error);
exit(1);
}
printf("Found nvmlSystemGetDriverVersion\n");
_nvmlDeviceGetCount =
dlsym(nvml_handle, "nvmlDeviceGetCount");
if((error = dlerror()) != NULL) {
fprintf(stderr, "%s\n", error);
exit(1);
}
printf("Found nvmlDeviceGetCount\n");
_nvmlDeviceGetHandleByIndex =
dlsym(nvml_handle, "nvmlDeviceGetHandleByIndex");
if((error = dlerror()) != NULL) {
fprintf(stderr, "%s\n", error);
exit(1);
}
printf("Found nvmlDeviceGetHandleByIndex\n");
_nvmlDeviceGetPciInfo =
dlsym(nvml_handle, "nvmlDeviceGetPciInfo");
if((error = dlerror()) != NULL) {
fprintf(stderr, "%s\n", error);
exit(1);
}
printf("Found nvmlDeviceGetPciInfo\n");
_nvmlDeviceGetPowerUsage =
dlsym(nvml_handle, "nvmlDeviceGetPowerUsage");
if((error = dlerror()) != NULL) {
fprintf(stderr, "%s\n", error);
exit(1);
}
printf("Found nvmlDeviceGetPowerUsage\n");
// Initialise the library.
nvml_err = _nvmlInit();
if (NVML_SUCCESS != nvml_err) {
fprintf(stderr, "init failed: %s\n", _nvmlErrorString(nvml_err));
exit(1);
}
printf("Initialised NVML\n");
// Query the driver version.
nvml_err = _nvmlSystemGetDriverVersion(driver_version,
NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE);
if (NVML_SUCCESS != nvml_err) {
fprintf(stderr, "couldn't get driver version: %s\n",
_nvmlErrorString(nvml_err));
exit(1);
}
printf("Got driver version: %s\n", driver_version);
// Query the number of installed devices.
nvml_err = _nvmlDeviceGetCount(&device_count);
if (NVML_SUCCESS != nvml_err) {
fprintf(stderr, "couldn't get device count: %s\n",
_nvmlErrorString(nvml_err));
exit(1);
}
// Attempt to query each device's PCI info.
for (i = 0; i < device_count; i++) {
nvml_err = _nvmlDeviceGetHandleByIndex(i, &device);
if (NVML_SUCCESS != nvml_err) {
fprintf(stderr, "couldn't get handle for device %d: %s\n",
i, _nvmlErrorString(nvml_err));
}
else {
nvml_err = _nvmlDeviceGetPciInfo(device, &pci_info);
if (NVML_SUCCESS != nvml_err) {
fprintf(stderr, "couldn't get PCI info for device %d: %s\n",
i, _nvmlErrorString(nvml_err));
}
else {
unsigned int power = getPower(i, device, _nvmlDeviceGetPowerUsage, _nvmlErrorString);
printf("%s\n", "-------------------");
printf("PCI info for device %d:\n", i);
printf("busId = %s\n", pci_info.busId);
printf("pciDeviceId = %x\n", pci_info.pciDeviceId);
printf("pciSubSystemId = %x\n", pci_info.pciSubSystemId);
printf("Power usage = %d\n", power);
}
}
}
// Shutdown the library.
nvml_err = _nvmlShutdown();
if (NVML_SUCCESS != nvml_err) {
fprintf(stderr, "shutdown failed: %s\n", _nvmlErrorString(nvml_err));
exit(1);
}
printf("Shutdown NVML\n");
// Close the handle.
err = dlclose(nvml_handle);
if (err) {
fprintf(stderr, "Failed to close library: %s\n", dlerror());
exit(1);
}
printf("Library closed\n");
printf("Everything seems to be working OK :-)\n");
return 0;
}
void *thread_start(void * param)
{
nvmlReturn_t (*func)(nvmlDevice_t, unsigned int*) = ((void**)param)[2];
char* (*err)(nvmlReturn_t) = ((void**)param)[3];
nvmlDevice_t* device = ((void**)param)[0];
unsigned int* power = ((void**)param)[1];
int* i = ((void**)param)[5];
nvmlReturn_t nvml_err = func(*device, power);
if (NVML_SUCCESS != nvml_err) {
fprintf(stderr, "couldn't get power info for device %d: %s\n",
*i, err(nvml_err));
}
return NULL;
}
unsigned int getPower(int i, nvmlDevice_t device, nvmlReturn_t (*func)(nvmlDevice_t, unsigned int*), char* (*err)(nvmlReturn_t))
{
pthread_t thread;
unsigned int power;
void **params = (void**)malloc(5 * sizeof(void*));
params[0] = &device;
params[1] = &power;
params[2] = func;
params[3] = err;
params[4] = &i;
pthread_create(&thread, NULL, thread_start, params);
sleep (2);
return power;
}