Trouble allocating device memory for a struct

doscar.sole · March 6, 2022, 12:36pm

Hello:
I’m trying to allocate memory for a device struct on C, but I’m missing something as I can’t allocate the different parts of the struct itself…

Here’s the code I’m using:

#include "stdio.h"
#include "stdlib.h"
#include "conio.h"
#include "BigInteger.h"


#include "cuda_runtime.h"
#include "device_launch_parameters.h"

size_t getMemorySize();

int main() {
  memory* n = NULL;

  cudaMalloc(&n, getMemorySize()); //this one is working
  printf("Error malloc: %s\n", cudaGetErrorString(cudaGetLastError()));

  //here, I want to use, for example n->mzero
  cudaMalloc(&((memory*)n)->mzero, sizeof(BigInteger)); //error, unable to write memory

  return 0;
}

size_t getMemorySize() {
  return (sizeof(BigInteger) * 19) + sizeof(BIT);
}

//BigInteger.h
typedef struct memory {
  //add
  void* vt;

  //sub
  void* stmp;

  //mul
  void* mpart;
  void* mret;
  void* mzero;
  void* mone;
  void* mtmp;

  //dvs
  void* done;
  void* dtmp;
  void* dret;
  void* dTemp;
  void* biTemp;

  //nqrt
  void* sret;
  void* sraw;
  void* sbase;
  void* szero;

  //bipow
  void* bres;
  void* btmp;

  //append
  void* aaux;

  //BIT
  void* biBIT;
} memory;


typedef struct BIT{
  struct BigInteger BI[10];
  int status[10];
} BIT;


typedef struct BigInteger {
  char k;
  int count;
  signed char n[4096]
} BigInteger;

So, I’m not able to use any memory part as I’m not able to do a proper malloc… I’m surely messing around with the pointers or something like that, but as much as I can try nothing is working…

Can someone help?

Thanks.

striker159 · March 6, 2022, 1:15pm

You allocate n in device memory. You cannot access struct members of n on the host if it is in device memory.

First create simple struct on the host. use cudaMalloc for its members. Then use cudaMalloc to create the struct in device .memory and cudaMemcpy the host struct to the device struct.

doscar.sole · March 6, 2022, 2:49pm

Hi:

Looks like it gets a bit better, but still not totally OK…

#include "stdio.h"
#include "stdlib.h"
#include "conio.h"
#include "BigInteger.h"


#include "cuda_runtime.h"
#include "device_launch_parameters.h"


int main() {
  memory* n = NULL; //device memory
  _cinit((void**)&n); //init memory struct

  //n->mzero still has memory error!

  return 0;
}

void _cinit(void** n) {
  memory* t = (memory*)malloc(getMemorySize()); //First create simple struct on the host.

  cudaMalloc((void**)&((memory*)t)->vt, sizeof(BigInteger)); //use cudaMalloc for its members.
  cudaMalloc((void**)&((memory*)t)->stmp, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->mpart, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->mret, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->mzero, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->mone, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->mtmp, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->done, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->dtmp, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->dret, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->dTemp, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->biTemp, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->sret, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->sraw, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->sbase, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->szero, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->bres, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->btmp, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->aaux, sizeof(BigInteger));
  cudaMalloc(&((memory*)t)->biBIT, sizeof(BIT));

  //copy 't' over 'n'
  h2d(n, t, 1, 1);

  free(t);
}

void h2d(void** device, void* host, int quantity, int type) {
  size_t sz;

  //ToDo: añadir una buena gestión aqui
  //Evitamos copiar cantidades ilegales de memroria
  if (quantity <= 0)
    return;

  if (type == 0)
    sz = sizeof(BigInteger);
  else if (type == 1)
    sz = getMemorySize();
  else
    return;

  cudaMalloc(device, sz * quantity); //Then use cudaMalloc to create the struct in device
  printf("Error malloc: %s\n", cudaGetErrorString(cudaGetLastError()));

  cudaMemcpy(*device, host, sz * quantity, cudaMemcpyHostToDevice); //and cudaMemcpy the host struct to the device struct
}

size_t getMemorySize() {
  return (sizeof(BigInteger) * 19) + sizeof(BIT);
}

Any suggestions?

Thanks.

striker159 · March 6, 2022, 4:44pm

I suggest using proper CUDA error checking. But it is also your host code which is not working correctly.
getMemorySize() does not return the correct size. This is simply a C programming mistake unrelated to CUDA.

striker159 · March 6, 2022, 6:29pm

It could look like this:

#include <assert.h>

struct BigInt{
  //... something
};

typedef struct memory {
  //add
  void* vt;

  //sub
  void* stmp;
} memory;

int main(){
  memory tmp;
  memory* d_memory;
  cudaError_t status;

  status = cudaMalloc(&tmp.vt, sizeof(struct BigInt));
  assert(status == cudaSuccess);

  status = cudaMalloc(&tmp.stmp, sizeof(struct BigInt));
  assert(status == cudaSuccess);

  status = cudaMalloc(&d_memory, sizeof(memory));
  assert(status == cudaSuccess);

  status = cudaMemcpy(d_memory, &tmp, sizeof(memory), cudaMemcpyHostToDevice);
  assert(status == cudaSuccess);

  //deallocate device memory, assuming tmp does not exist anymore
  memory tmp2;
  status = cudaMemcpy(&tmp2, d_memory, sizeof(memory), cudaMemcpyDeviceToHost);
  assert(status == cudaSuccess);
  
  status = cudaFree(tmp2.vt);
  assert(status == cudaSuccess);

  status = cudaFree(tmp2.stmp);
  assert(status == cudaSuccess);
  
  status = cudaFree(d_memory);
  assert(status == cudaSuccess);
};

doscar.sole · March 6, 2022, 7:13pm

Hi, what do you mean it’s not getting the right size?
On the C version it works… there are 19 BigInteger variables on memory struct plus a BIT variable… so that’s what the function is returning…

Is there a mistake I’m making?

striker159 · March 6, 2022, 7:53pm

Your memory struct does not store 19 BigInteger variables + 1 BIT variable. It stores 20 void* pointers, i.e. sizeof(void*) * 20.

doscar.sole · March 8, 2022, 6:44pm

Finally… I have this code

void _cinit(void** n) {
  memory t;
  BigInteger w;

  cudaMalloc(n, sizeof(memory));
  BImemcpy(&w, 2); //moves the value "2" to w

  cudaMalloc(&t.vt, sizeof(BigInteger));
  cudaMalloc(&t.stmp, sizeof(BigInteger));
  cudaMalloc(&t.mpart, sizeof(BigInteger));
  cudaMalloc(&t.mret, sizeof(BigInteger));
  cudaMalloc(&t.mzero, sizeof(BigInteger));
  cudaMalloc(&t.mone, sizeof(BigInteger));
  cudaMalloc(&t.mtmp, sizeof(BigInteger));
  cudaMalloc(&t.done, sizeof(BigInteger));
  cudaMalloc(&t.dtmp, sizeof(BigInteger));
  cudaMalloc(&t.dret, sizeof(BigInteger));
  cudaMalloc(&t.dTemp, sizeof(BigInteger));
  cudaMalloc(&t.biTemp, sizeof(BigInteger));
  cudaMalloc(&t.sret, sizeof(BigInteger));
  cudaMalloc(&t.sraw, sizeof(BigInteger));
  cudaMalloc(&t.sbase, sizeof(BigInteger));
  cudaMalloc(&t.szero, sizeof(BigInteger));
  cudaMalloc(&t.bres, sizeof(BigInteger));
  cudaMalloc(&t.btmp, sizeof(BigInteger));
  cudaMalloc(&t.aaux, sizeof(BigInteger));
  cudaMalloc(&t.biBIT, sizeof(BIT));

  //adding some fixed values 
  cudaMemcpy(((BigInteger*)t.vt), &w, sizeof(BigInteger), cudaMemcpyHostToDevice);
  cudaMemcpy(*n, &t, sizeof(memory), cudaMemcpyHostToDevice);
}

Then it can be called as

int main(){
  memory* n = NULL;

  _cinit((void**)&n);

  return 0;
}

Now looks like it’s working…

Thanks!

system · March 22, 2022, 6:45pm

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.

Topic		Replies	Views
Allocating device memory for an struc inside an std::vector<struct> CUDA Programming and Performance	2	44	September 28, 2024
cudaHostAlloc can only allocate about 3.5GB of memory out of 128GB CUDA Programming and Performance	7	429	June 2, 2023
cudaMalloc error in big loop CUDA Programming and Performance	12	15594	May 21, 2008
Bad performance or bad coding? CUDA Programming and Performance	21	763	October 12, 2021
How to allocate memory for host in cuda main function? CUDA Programming and Performance	5	282	July 28, 2023
cudaMalloc3D and friends proper use for whatever data type CUDA Programming and Performance	6	5916	July 14, 2010
Struct allocated with cudaMallocManaged fails up from a certain size CUDA Programming and Performance	4	27	February 4, 2025
How get in host the memory allocated from device CUDA Programming and Performance	10	2944	August 16, 2017
[Beginner] Memory is reseted in the kernel CUDA Programming and Performance	5	1318	October 29, 2010
Allocating memory for 2D Array CUDA Programming and Performance	10	3590	September 4, 2009

Trouble allocating device memory for a struct

Related topics