I fight with structs in cuda for couple days, simple float array passing works fine. The problem is with simple structs…
Take look at the snippets
Struct edge looks like this - for now (at the begining it was bigger, but problems made me to do a proxy)
struct edge {
int value;
};
#include <stdio.h>
#include "structs.h"
#include <cuda.h>
__global__ void findAllEdges(struct edge** ed) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
(*ed)[idx].value = 23; //some value, not important yet
}
void findEdges(struct elem** elems, struct edge** edges, struct param* params) {
int i;
int edgesAmount = 4*( params->amountOfElems ); //40 atm
(*edges) = (struct edge*) malloc (sizeof(struct edge)*edgesAmount);
for(i=0;i<edgesAmount;i++) {
(*edges)[i].value = 11; //value to differ from wanted 23
}
struct edge* d_edges;
size_t size = edgesAmount*sizeof(struct edge);
cudaMalloc((void **) &d_edges, size);
cudaMemcpy(d_edges, edges, size, cudaMemcpyHostToDevice);
int NT = params->amountOfElems*4;
int n_blocks = NT/5 + (NT%5 == 0 ? 0:1);
findAllEdges <<< n_blocks, 5 >>> (&d_edges);
int cudaStatus = cudaMemcpy(edges, d_edges, size, cudaMemcpyDeviceToHost);
if(cudaStatus == cudaErrorLaunchFailure)
printf("WTF\n");
for(i=0;i<edgesAmount;i++)
printf("edge[%d] val %d \n",i,(*edges)[i].value);
cudaFree(d_edges);
}
the last cudaMemCpy returns cudaErrorLaunchFailure … so it prints WTF…
when i print the array of edges it prints value 11 instead of 23 (due to failed MemCpy i guess).
What am i doing wrong, i am about to go insane due to that simple issue…