Hello.
I have this three structs:
[codebox]struct Atom{
int serial ;
char atom_name[5] ;
float coord[4] ;
float occupancy ;
float temp_factor ;
float charge ;
} ;
struct Amino_Acid{
char res_name[4] ;
char chainID[2] ;
char res_seq_plus_iCode[6] ;
char olc[2] ;
int nc ;
int size ;
struct Atom *Atom ;
} ;
struct Structure{
char ident[256] ;
int length ;
struct Amino_Acid *Residue ;
} ;[/codebox]
Each “Structure” has thousands of Amino_Acids (Residue pointer), and each “Amino_Acid” has thousands of Atoms (Atom pointer).
How can I copy a complete “Structure” to Global Memory?
I suppose I can’t do:
[codebox]
cudaMalloc((void **) &structure_d, sizeof(Structure));
cudaMalloc((void **) &structure_d.Residue, sizeof(Amino_Acid) * structure_h.length);
…
[/codebox]
and do the same for the Atoms of each Residue. I imagine that cudaMalloc can’t write the pointer in Global Memory, can it?
In that case I might use a lot of host pointers and pass them like kernel params (Impossible).
Thank you!
You are correct in supposing your current approach won’t work. Something like this might:
#include <assert.h>
#include <stdio.h>
#include <cuda_runtime.h>
#ifndef gpuAssert
#include <stdio.h>
#define gpuAssert( condition ) {if( (condition) != 0 ) { fprintf( stderr, "\n FAILURE %d in %s, line %d\n", condition, __FILE__, __LINE__ );exit( 1 );}}
#endif
#define N_RESIDUE (100)
#define N_ATOMS (1000)
struct Atom{
int serial;
char atom_name[5];
float coord[4];
float occupancy;
float temp_factor;
float charge;
};
struct Amino_Acid{
char res_name[4];
char chainID[2];
char res_seq_plus_iCode[6];
char olc[2];
int nc;
int size;
struct Atom *Atom;
};
struct Structure{
char ident[256];
int length;
struct Amino_Acid *Residue;
};
int main()
{
struct Structure * top, * _top;
struct Amino_Acid * residue, * _residue;
struct Atom * atoms, * _atoms;
assert( !(( top = (struct Structure *)malloc(sizeof(struct Structure)) ) == NULL) );
assert( !(( residue = (struct Amino_Acid *)malloc(N_RESIDUE * sizeof(struct Amino_Acid)) ) == NULL) );
assert( !(( atoms = (struct Atom *)malloc(N_ATOMS * N_RESIDUE * sizeof(struct Atom)) ) == NULL) );
gpuAssert( cudaMalloc( (void**)&_top, sizeof(struct Structure) ) );
gpuAssert( cudaMalloc( (void**)&_residue, N_RESIDUE * sizeof(struct Amino_Acid) ) );
gpuAssert( cudaMalloc( (void**)&_atoms, N_ATOMS * N_RESIDUE * sizeof(struct Atom) ) );
for(int i = 0, offset = 0; i < N_RESIDUE; i++, offset += N_ATOMS) {
residue[i].size = N_ATOMS;
residue[i].Atom = _atoms + offset;
}
gpuAssert( cudaMemcpy( _atoms, atoms, N_ATOMS * N_RESIDUE * sizeof(struct Atom), cudaMemcpyHostToDevice) );
gpuAssert( cudaMemcpy( _residue, residue, N_RESIDUE * sizeof(struct Amino_Acid), cudaMemcpyHostToDevice) );
top->Residue = _residue;
top->length = N_RESIDUE;
gpuAssert( cudaMemcpy( _top, top, sizeof(struct Structure), cudaMemcpyHostToDevice) );
cudaFree(_atoms); cudaFree(_residue); cudaFree(_top);
free(atoms), free(residue), free(top);
return cudaThreadExit();
}
The idea here is to allocate a pool of memory to hold all the atoms, then iteratively assign atoms to residues in suitably size chunks from the pool (I have assumed a constant number of atoms per residue, but changing it is trivial). Additional initializing data should be put into the host side copies of the three structures - they are then copied to the device. I think that should leave you with a complete, initialized structure in device global memory.
Thank you so much. Very interesting.
I think it could work.