Could anybody spot why the following wraper function creates a segmentation fault…?
float compute_e_bonds(MMFF94S_bond_calcs_t* bondsIn, int gradients,
Bond_LookUp_t lookUp_bonds, int numAtoms, float* gradient_pointers) {
float e_bonds = 0.0f;
cudaGetDeviceProperties(&prop, 0);
blocksPerGrid = 2 * prop.multiProcessorCount;
threadsPerBlock = (numAtoms + blocksPerGrid - 1) / blocksPerGrid;
float* e, *dev_e, *dev_gradPointers;
int *dev_lookUp_bonds, *dev_lengths, *dev_offsets;
MMFF94S_bond_calcs_t *dev_bonds;
//allocate MMFFBONDS
cudaMalloc(
(void**) &dev_bonds,
lookUp_bonds.bonds_struct_length
* sizeof(struct MMFF94S_bond_calcs));
cudaMemcpy(
dev_bonds,
bondsIn,
lookUp_bonds.bonds_struct_length
* sizeof(struct MMFF94S_bond_calcs), cudaMemcpyHostToDevice);
//allocate int* lookup_bonds
cudaMalloc((void**) &dev_lookUp_bonds,
lookUp_bonds.bond_pointer_length * sizeof(int));
cudaMemcpy(dev_lookUp_bonds, lookUp_bonds.bonds,
lookUp_bonds.bond_pointer_length * sizeof(int),
cudaMemcpyHostToDevice);
//allocate and copy int* lookup_bonds lengths
cudaMalloc((void**) &dev_lengths, numAtoms * sizeof(int));
cudaMemcpy(dev_lengths, lookUp_bonds.length, numAtoms * sizeof(int),
cudaMemcpyHostToDevice);
//allocate and copy int* lookup_bonds offsets
cudaMalloc((void**) &dev_offsets, numAtoms * sizeof(int));
cudaMemcpy(dev_offsets, lookUp_bonds.offset, numAtoms * sizeof(int),
cudaMemcpyHostToDevice);
//allocate and copy grad pointers
cudaMalloc((void**) &dev_gradPointers, 3 * numAtoms * sizeof(float));
cudaMemcpy(dev_gradPointers, gradient_pointers,
3 * numAtoms * sizeof(float), cudaMemcpyHostToDevice);
//alocate e
cudaMalloc((void**) &dev_e, numAtoms * sizeof(float));
printf("Good up to kernel : %d\n", sizeof(struct MMFF94S_bond_calcs));
compute_bonds_energy<<<blocksPerGrid, threadsPerBlock>>>(dev_bonds,dev_e, dev_lookUp_bonds,dev_offsets, dev_lengths, numAtoms, dev_gradPointers);
cudaFree(dev_bonds);
cudaFree(dev_gradPointers);
cudaFree(dev_lookUp_bonds);
cudaFree(dev_offsets);
cudaFree(dev_lengths);
cudaMemcpy(e, dev_e, numAtoms*sizeof(float),
cudaMemcpyDeviceToHost);
cudaFree(dev_e);
cudaThreadExit();
for ( int i = 0; i < numAtoms; i++) {
e_bonds += e[i];
printf("Atom : %d | e = %lf \n", i+1, e[i]);
}
free(e);
return e_bonds;
}
All the structs and pointers i pass into the class have been checked and they are fine. Why do i get that?
Also the kernel was working before i make a small change : return e* from the function and then reduce it to float e_bonds;
Thanks in advance,
thanasio