I’m trying to build a forward function for my NEAT algorithm but compiler is giving an illegal memory access was encountered
error. I tried to mem-check but there is nothing helpful
Output;
========= Invalid __global__ write of size 4 bytes
========= at EvaluateNetworks(float *, Network *, int, int, int)+0x7c0
========= by thread (96,0,0) in block (0,0,0)
========= Address 0x0 is out of bounds
========= and is 47.248.900.096 bytes before the nearest allocation at 0xb00410000 of size 65.536 bytes
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame: cuEventRecordWithFlags [0x7ff9aa88fbc6] in nvcuda64.dll
========= Host Frame: [0x16468] in a.exe
========= Host Frame: [0x16326] in a.exe
========= Host Frame: [0x16f61] in a.exe
========= Host Frame: [0x650b] in a.exe
========= Host Frame: [0x5829] in a.exe
========= Host Frame: [0x4c29] in a.exe
========= Host Frame: [0x5000] in a.exe
========= Host Frame: [0x2aa9c] in a.exe
========= Host Frame: BaseThreadInitThunk [0x7ff9e7d07374] in KERNEL32.DLL
========= Host Frame: RtlUserThreadStart [0x7ff9e8c3cc91] in ntdll.dll
=========
========= Program hit cudaErrorLaunchFailure (error 719) due to "unspecified launch failure" on CUDA API call to cudaDeviceSynchronize.
========= Saved host backtrace up to driver entry point at error
========= Host Frame: cuProfilerStop [0x7ff9aa9cc345] in nvcuda64.dll
========= Host Frame: [0x16a9c] in a.exe
========= Host Frame: [0x5006] in a.exe
========= Host Frame: [0x2aa9c] in a.exe
========= Host Frame: BaseThreadInitThunk [0x7ff9e7d07374] in KERNEL32.DLL
========= Host Frame: RtlUserThreadStart [0x7ff9e8c3cc91] in ntdll.dll
=========
GPUassert: unspecified launch failure a.cu 305
Structs:
struct Connection {
int innovationid;
int from;
int to;
float weight;
//int type; // 0 input to hidden, 1 input to output, 2 hidden to hidden, 3 hidden to output
};
struct Neuron {
int type; // 0 input, 1 hidden, 2 output
float input_sum;
float bias;
float output = 0.0f;
int* incoming_connections;
int id;
int connected_num;
};
struct Network {
Connection* Connections;
Neuron* Neurons;
int output_ids[3];
int num_neurons;
int num_connections;
float fitness;
};
Functions:
__device__ void GetNetworkOutput(float* rollingdata, Network* net, int* output_ids, int* result_id) {
float* outputs = new float[net->num_neurons];
float* values = new float[net->num_neurons];
int* completed = new int[net->num_connections];
for (int i = 0; i < net->num_neurons; ++i) {
if (net->Neurons[i].type == 0) {
values[i] = rollingdata[i];
outputs[i] = rollingdata[i];
}
else {
values[i] = 0.0f;
outputs[i] = 0.0f;
}
}
bool finished = false;
for (int i = 0; i < net->num_connections; ++i) {
if (net->Neurons[net->Connections[i].from].type == 0) {
values[net->Connections[i].to] += net->Connections[i].weight * values[net->Connections[i].from];
completed[i] = 1;
}
else {
completed[i] = 0;
}
}
while (!finished) {
finished = true;
for (int i = 0; i < net->num_connections; ++i) {
if (completed[i] == 0) {
finished = false;
if (values[net->Connections[i].from] != 0.0f) {
bool ehezowski = true;
for (int j = 0; j < net->Neurons[net->Connections[i].from].connected_num; ++j) {
if (outputs[net->Neurons[net->Connections[i].from].incoming_connections[j]] == 0.0f) {
ehezowski = false;
}
}
if (ehezowski) {
if (outputs[net->Connections[i].from] == 0.0f) {
outputs[net->Connections[i].from] = activationFunction(values[net->Connections[i].from] + net->Neurons[net->Connections[i].from].bias);
}
values[net->Connections[i].to] += net->Connections[i].weight * values[net->Connections[i].from];
completed[i] = 1;
}
}
}
}
}
for (int i = 0; i < net->num_neurons; ++i) {
if (net->Neurons[i].type == 2) {
outputs[i] = activationFunction(values[i] + net->Neurons[i].bias);
}
}
float biggest = 0.0f;
for (int index = 0; index < 3; ++index) {
int val = output_ids[index];
biggest = (outputs[val] > biggest) ? outputs[val] : biggest;
*result_id = (outputs[val] > biggest) ? index : *result_id;
}
delete[] completed;
delete[] values;
delete[] outputs;
}
__global__ void EvaluateNetworks(float* __restrict__ rollingdata, Network* __restrict__ d_networks, int pop_num, int input_num, int output_num) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx >= pop_num) return;
float* first_390_data = new float[input_num];
for (int i = 0; i < input_num; ++i) {
first_390_data[i] = rollingdata[i];
}
Network* net = &d_networks[idx];
int result_id = -1;
if (net == nullptr || net->output_ids == nullptr || net->Connections == nullptr) {
printf("Error: Null pointer in GetNetworkOutput\n");
return;
}
GetNetworkOutput(first_390_data, net, net->output_ids, &result_id);
printf("%i\n", result_id);
}
It’s probably something like wrong pointer call but after hours of trying to solve the issue, couldn’t find the issue with the code