Trouble allocating device memory for a struct

Hello:
I’m trying to allocate memory for a device struct on C, but I’m missing something as I can’t allocate the different parts of the struct itself…

Here’s the code I’m using:

#include "stdio.h"
#include "stdlib.h"
#include "conio.h"
#include "BigInteger.h"


#include "cuda_runtime.h"
#include "device_launch_parameters.h"

size_t getMemorySize();

int main() {
  memory* n = NULL;

  cudaMalloc(&n, getMemorySize()); //this one is working
  printf("Error malloc: %s\n", cudaGetErrorString(cudaGetLastError()));

  //here, I want to use, for example n->mzero
  cudaMalloc(&((memory*)n)->mzero, sizeof(BigInteger)); //error, unable to write memory

  return 0;
}

size_t getMemorySize() {
  return (sizeof(BigInteger) * 19) + sizeof(BIT);
}

//BigInteger.h
typedef struct memory {
  //add
  void* vt;

  //sub
  void* stmp;

  //mul
  void* mpart;
  void* mret;
  void* mzero;
  void* mone;
  void* mtmp;

  //dvs
  void* done;
  void* dtmp;
  void* dret;
  void* dTemp;
  void* biTemp;

  //nqrt
  void* sret;
  void* sraw;
  void* sbase;
  void* szero;

  //bipow
  void* bres;
  void* btmp;

  //append
  void* aaux;

  //BIT
  void* biBIT;
} memory;


typedef struct BIT{
  struct BigInteger BI[10];
  int status[10];
} BIT;


typedef struct BigInteger {
  char k;
  int count;
  signed char n[4096]
} BigInteger;

So, I’m not able to use any memory part as I’m not able to do a proper malloc… I’m surely messing around with the pointers or something like that, but as much as I can try nothing is working…

Can someone help?

Thanks.

You allocate n in device memory. You cannot access struct members of n on the host if it is in device memory.

First create simple struct on the host. use cudaMalloc for its members. Then use cudaMalloc to create the struct in device .memory and cudaMemcpy the host struct to the device struct.

Hi:

Looks like it gets a bit better, but still not totally OK…

#include "stdio.h"
#include "stdlib.h"
#include "conio.h"
#include "BigInteger.h"


#include "cuda_runtime.h"
#include "device_launch_parameters.h"


int main() {
  memory* n = NULL; //device memory
  _cinit((void**)&n); //init memory struct

  //n->mzero still has memory error!

  return 0;
}

void _cinit(void** n) {
  memory* t = (memory*)malloc(getMemorySize()); //First create simple struct on the host.

  cudaMalloc((void**)&((memory*)t)->vt, sizeof(BigInteger)); //use cudaMalloc for its members.
  cudaMalloc((void**)&((memory*)t)->stmp, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->mpart, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->mret, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->mzero, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->mone, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->mtmp, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->done, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->dtmp, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->dret, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->dTemp, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->biTemp, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->sret, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->sraw, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->sbase, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->szero, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->bres, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->btmp, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->aaux, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->biBIT, sizeof(BIT));

  //copy 't' over 'n'
  h2d(n, t, 1, 1);

  free(t);
}

void h2d(void** device, void* host, int quantity, int type) {
  size_t sz;

  //ToDo: añadir una buena gestión aqui
  //Evitamos copiar cantidades ilegales de memroria
  if (quantity <= 0)
    return;

  if (type == 0)
    sz = sizeof(BigInteger);
  else if (type == 1)
    sz = getMemorySize();
  else
    return;

  cudaMalloc(device, sz * quantity); //Then use cudaMalloc to create the struct in device
  printf("Error malloc: %s\n", cudaGetErrorString(cudaGetLastError()));

  cudaMemcpy(*device, host, sz * quantity, cudaMemcpyHostToDevice); //and cudaMemcpy the host struct to the device struct
}

size_t getMemorySize() {
  return (sizeof(BigInteger) * 19) + sizeof(BIT);
}

Any suggestions?

Thanks.

I suggest using proper CUDA error checking. But it is also your host code which is not working correctly.
getMemorySize() does not return the correct size. This is simply a C programming mistake unrelated to CUDA.

It could look like this:

#include <assert.h>

struct BigInt{
  //... something
};

typedef struct memory {
  //add
  void* vt;

  //sub
  void* stmp;
} memory;

int main(){
  memory tmp;
  memory* d_memory;
  cudaError_t status;

  status = cudaMalloc(&tmp.vt, sizeof(struct BigInt));
  assert(status == cudaSuccess);

  status = cudaMalloc(&tmp.stmp, sizeof(struct BigInt));
  assert(status == cudaSuccess);

  status = cudaMalloc(&d_memory, sizeof(memory));
  assert(status == cudaSuccess);

  status = cudaMemcpy(d_memory, &tmp, sizeof(memory), cudaMemcpyHostToDevice);
  assert(status == cudaSuccess);

  //deallocate device memory, assuming tmp does not exist anymore
  memory tmp2;
  status = cudaMemcpy(&tmp2, d_memory, sizeof(memory), cudaMemcpyDeviceToHost);
  assert(status == cudaSuccess);
  
  status = cudaFree(tmp2.vt);
  assert(status == cudaSuccess);

  status = cudaFree(tmp2.stmp);
  assert(status == cudaSuccess);
  
  status = cudaFree(d_memory);
  assert(status == cudaSuccess);
};

Hi, what do you mean it’s not getting the right size?
On the C version it works… there are 19 BigInteger variables on memory struct plus a BIT variable… so that’s what the function is returning…

Is there a mistake I’m making?

Your memory struct does not store 19 BigInteger variables + 1 BIT variable. It stores 20 void* pointers, i.e. sizeof(void*) * 20.

Finally… I have this code

void _cinit(void** n) {
  memory t;
  BigInteger w;

  cudaMalloc(n, sizeof(memory));
  BImemcpy(&w, 2); //moves the value "2" to w

  cudaMalloc(&t.vt, sizeof(BigInteger));
  cudaMalloc(&t.stmp, sizeof(BigInteger));
  cudaMalloc(&t.mpart, sizeof(BigInteger));
  cudaMalloc(&t.mret, sizeof(BigInteger));
  cudaMalloc(&t.mzero, sizeof(BigInteger));
  cudaMalloc(&t.mone, sizeof(BigInteger));
  cudaMalloc(&t.mtmp, sizeof(BigInteger));
  cudaMalloc(&t.done, sizeof(BigInteger));
  cudaMalloc(&t.dtmp, sizeof(BigInteger));
  cudaMalloc(&t.dret, sizeof(BigInteger));
  cudaMalloc(&t.dTemp, sizeof(BigInteger));
  cudaMalloc(&t.biTemp, sizeof(BigInteger));
  cudaMalloc(&t.sret, sizeof(BigInteger));
  cudaMalloc(&t.sraw, sizeof(BigInteger));
  cudaMalloc(&t.sbase, sizeof(BigInteger));
  cudaMalloc(&t.szero, sizeof(BigInteger));
  cudaMalloc(&t.bres, sizeof(BigInteger));
  cudaMalloc(&t.btmp, sizeof(BigInteger));
  cudaMalloc(&t.aaux, sizeof(BigInteger));
  cudaMalloc(&t.biBIT, sizeof(BIT));

  //adding some fixed values 
  cudaMemcpy(((BigInteger*)t.vt), &w, sizeof(BigInteger), cudaMemcpyHostToDevice);
  cudaMemcpy(*n, &t, sizeof(memory), cudaMemcpyHostToDevice);
}

Then it can be called as

int main(){
  memory* n = NULL;

  _cinit((void**)&n);

  return 0;
}

Now looks like it’s working…

Thanks!

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.