Hello:
I’m trying to allocate memory for a device struct on C, but I’m missing something as I can’t allocate the different parts of the struct itself…
Here’s the code I’m using:
#include "stdio.h"
#include "stdlib.h"
#include "conio.h"
#include "BigInteger.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
size_t getMemorySize();
int main() {
memory* n = NULL;
cudaMalloc(&n, getMemorySize()); //this one is working
printf("Error malloc: %s\n", cudaGetErrorString(cudaGetLastError()));
//here, I want to use, for example n->mzero
cudaMalloc(&((memory*)n)->mzero, sizeof(BigInteger)); //error, unable to write memory
return 0;
}
size_t getMemorySize() {
return (sizeof(BigInteger) * 19) + sizeof(BIT);
}
//BigInteger.h
typedef struct memory {
//add
void* vt;
//sub
void* stmp;
//mul
void* mpart;
void* mret;
void* mzero;
void* mone;
void* mtmp;
//dvs
void* done;
void* dtmp;
void* dret;
void* dTemp;
void* biTemp;
//nqrt
void* sret;
void* sraw;
void* sbase;
void* szero;
//bipow
void* bres;
void* btmp;
//append
void* aaux;
//BIT
void* biBIT;
} memory;
typedef struct BIT{
struct BigInteger BI[10];
int status[10];
} BIT;
typedef struct BigInteger {
char k;
int count;
signed char n[4096]
} BigInteger;
So, I’m not able to use any memory
part as I’m not able to do a proper malloc… I’m surely messing around with the pointers or something like that, but as much as I can try nothing is working…
Can someone help?
Thanks.
You allocate n
in device memory. You cannot access struct members of n
on the host if it is in device memory.
First create simple struct on the host. use cudaMalloc for its members. Then use cudaMalloc to create the struct in device .memory and cudaMemcpy the host struct to the device struct.
Hi:
Looks like it gets a bit better, but still not totally OK…
#include "stdio.h"
#include "stdlib.h"
#include "conio.h"
#include "BigInteger.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
int main() {
memory* n = NULL; //device memory
_cinit((void**)&n); //init memory struct
//n->mzero still has memory error!
return 0;
}
void _cinit(void** n) {
memory* t = (memory*)malloc(getMemorySize()); //First create simple struct on the host.
cudaMalloc((void**)&((memory*)t)->vt, sizeof(BigInteger)); //use cudaMalloc for its members.
cudaMalloc((void**)&((memory*)t)->stmp, sizeof(BigInteger));
cudaMalloc(&((memory*)t)->mpart, sizeof(BigInteger));
cudaMalloc(&((memory*)t)->mret, sizeof(BigInteger));
cudaMalloc(&((memory*)t)->mzero, sizeof(BigInteger));
cudaMalloc(&((memory*)t)->mone, sizeof(BigInteger));
cudaMalloc(&((memory*)t)->mtmp, sizeof(BigInteger));
cudaMalloc(&((memory*)t)->done, sizeof(BigInteger));
cudaMalloc(&((memory*)t)->dtmp, sizeof(BigInteger));
cudaMalloc(&((memory*)t)->dret, sizeof(BigInteger));
cudaMalloc(&((memory*)t)->dTemp, sizeof(BigInteger));
cudaMalloc(&((memory*)t)->biTemp, sizeof(BigInteger));
cudaMalloc(&((memory*)t)->sret, sizeof(BigInteger));
cudaMalloc(&((memory*)t)->sraw, sizeof(BigInteger));
cudaMalloc(&((memory*)t)->sbase, sizeof(BigInteger));
cudaMalloc(&((memory*)t)->szero, sizeof(BigInteger));
cudaMalloc(&((memory*)t)->bres, sizeof(BigInteger));
cudaMalloc(&((memory*)t)->btmp, sizeof(BigInteger));
cudaMalloc(&((memory*)t)->aaux, sizeof(BigInteger));
cudaMalloc(&((memory*)t)->biBIT, sizeof(BIT));
//copy 't' over 'n'
h2d(n, t, 1, 1);
free(t);
}
void h2d(void** device, void* host, int quantity, int type) {
size_t sz;
//ToDo: añadir una buena gestión aqui
//Evitamos copiar cantidades ilegales de memroria
if (quantity <= 0)
return;
if (type == 0)
sz = sizeof(BigInteger);
else if (type == 1)
sz = getMemorySize();
else
return;
cudaMalloc(device, sz * quantity); //Then use cudaMalloc to create the struct in device
printf("Error malloc: %s\n", cudaGetErrorString(cudaGetLastError()));
cudaMemcpy(*device, host, sz * quantity, cudaMemcpyHostToDevice); //and cudaMemcpy the host struct to the device struct
}
size_t getMemorySize() {
return (sizeof(BigInteger) * 19) + sizeof(BIT);
}
Any suggestions?
Thanks.
I suggest using proper CUDA error checking. But it is also your host code which is not working correctly.
getMemorySize()
does not return the correct size. This is simply a C programming mistake unrelated to CUDA.
It could look like this:
#include <assert.h>
struct BigInt{
//... something
};
typedef struct memory {
//add
void* vt;
//sub
void* stmp;
} memory;
int main(){
memory tmp;
memory* d_memory;
cudaError_t status;
status = cudaMalloc(&tmp.vt, sizeof(struct BigInt));
assert(status == cudaSuccess);
status = cudaMalloc(&tmp.stmp, sizeof(struct BigInt));
assert(status == cudaSuccess);
status = cudaMalloc(&d_memory, sizeof(memory));
assert(status == cudaSuccess);
status = cudaMemcpy(d_memory, &tmp, sizeof(memory), cudaMemcpyHostToDevice);
assert(status == cudaSuccess);
//deallocate device memory, assuming tmp does not exist anymore
memory tmp2;
status = cudaMemcpy(&tmp2, d_memory, sizeof(memory), cudaMemcpyDeviceToHost);
assert(status == cudaSuccess);
status = cudaFree(tmp2.vt);
assert(status == cudaSuccess);
status = cudaFree(tmp2.stmp);
assert(status == cudaSuccess);
status = cudaFree(d_memory);
assert(status == cudaSuccess);
};
Hi, what do you mean it’s not getting the right size?
On the C version it works… there are 19 BigInteger variables on memory struct plus a BIT variable… so that’s what the function is returning…
Is there a mistake I’m making?
Your memory struct does not store 19 BigInteger variables + 1 BIT variable. It stores 20 void* pointers, i.e. sizeof(void*) * 20.
Finally… I have this code
void _cinit(void** n) {
memory t;
BigInteger w;
cudaMalloc(n, sizeof(memory));
BImemcpy(&w, 2); //moves the value "2" to w
cudaMalloc(&t.vt, sizeof(BigInteger));
cudaMalloc(&t.stmp, sizeof(BigInteger));
cudaMalloc(&t.mpart, sizeof(BigInteger));
cudaMalloc(&t.mret, sizeof(BigInteger));
cudaMalloc(&t.mzero, sizeof(BigInteger));
cudaMalloc(&t.mone, sizeof(BigInteger));
cudaMalloc(&t.mtmp, sizeof(BigInteger));
cudaMalloc(&t.done, sizeof(BigInteger));
cudaMalloc(&t.dtmp, sizeof(BigInteger));
cudaMalloc(&t.dret, sizeof(BigInteger));
cudaMalloc(&t.dTemp, sizeof(BigInteger));
cudaMalloc(&t.biTemp, sizeof(BigInteger));
cudaMalloc(&t.sret, sizeof(BigInteger));
cudaMalloc(&t.sraw, sizeof(BigInteger));
cudaMalloc(&t.sbase, sizeof(BigInteger));
cudaMalloc(&t.szero, sizeof(BigInteger));
cudaMalloc(&t.bres, sizeof(BigInteger));
cudaMalloc(&t.btmp, sizeof(BigInteger));
cudaMalloc(&t.aaux, sizeof(BigInteger));
cudaMalloc(&t.biBIT, sizeof(BIT));
//adding some fixed values
cudaMemcpy(((BigInteger*)t.vt), &w, sizeof(BigInteger), cudaMemcpyHostToDevice);
cudaMemcpy(*n, &t, sizeof(memory), cudaMemcpyHostToDevice);
}
Then it can be called as
int main(){
memory* n = NULL;
_cinit((void**)&n);
return 0;
}
Now looks like it’s working…
Thanks!
system
Closed
March 22, 2022, 6:45pm
9
This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.