Hi Everyone,
I have the following files, which are working perfectly, I have a question. I would like to get the data back in my C++ program, how do I get it? For example if you see in the program, I would like float * c_d back in my c++ program. Please pardon my naming convention, I started with helloWorld example then ended up with matrix addition.
Any help is appreciated.
Regards,
Arup
helloWorld.cuh
#pragma once
void device_greetings(float * a_d);
helloWorld.cu
__global__ void device_greetings_kernel(float *a, int N)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx<N)
{
a[idx]=a[idx]+1.f;
}
}
void device_greetings(float * a_d)
{
float * b_d; //array to be assigned to devicie
float * c_d;
int N = 14;
int blocksize = 4;
int nBlocks = N/blocksize + (N%blocksize == 0?0:1);
printf("%d\n",nBlocks);
//allocate memory to the array in CUDA
cudaMalloc((void **) &b_d, sizeof(float)*N);
cudaMalloc((void **) &c_d, sizeof(float)*N);
cudaMemcpy(b_d,a_d, sizeof(float)*N,cudaMemcpyHostToDevice);
device_greetings_kernel <<< nBlocks, blocksize >>> (b_d, N);
cudaMemcpy(c_d,b_d, sizeof(float)*N,cudaMemcpyDeviceToHost);
cudaMalloc((void **) &a_d, sizeof(float)*N);
cudaMemcpy(a_d,b_d, sizeof(float)*N,cudaMemcpyDeviceToHost);
//traverse the array
for(int i=0;i<N;i++)
{
//increment c_d by 1
//c_d[i]=c_d[i]+1;
printf("%f\n",a_d[i]);
printf("%f\n", c_d[i]);
}
cudaFree(b_d);
cudaFree(c_d);
printf("Hello World from device.!!!\n");
}
helloWorld.cpp
#include <stdio.h>
#include <iostream>
#include "helloWorld.cuh"
using namespace std;
int main()
{
std::cout << "Hello World. !!!" << std::endl;
//create an array in host
float * a_h, * b_h; //array to be assigned in host
int N = 14;
int i;
a_h = new float[N]; //allocate array values in host
b_h = new float[N]; //allocate array values in host
for(i=0;i<N;i++)
{
a_h[i]=10.f+i;
b_h[i]=0.f;
std::cout << "a_h["<<i<<"]:" << a_h[i]<<std::endl;
//std::cout << "b_h["<<i<<"]:" << b_h[i]<<std::endl;
}
// launch a kernel with a single thread to greet from the device
device_greetings(a_h);
for(int j=0;j<N;j++)
{
std::cout<< "a_h["<<j<<"]: "<< a_h[j]<<std::endl;
}
free(a_h);
free(b_h);
return 0;
}
Hi Everyone,
I have the following files, which are working perfectly, I have a question. I would like to get the data back in my C++ program, how do I get it? For example if you see in the program, I would like float * c_d back in my c++ program. Please pardon my naming convention, I started with helloWorld example then ended up with matrix addition.
Any help is appreciated.
Regards,
Arup
helloWorld.cuh
#pragma once
void device_greetings(float * a_d);
helloWorld.cu
__global__ void device_greetings_kernel(float *a, int N)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx<N)
{
a[idx]=a[idx]+1.f;
}
}
void device_greetings(float * a_d)
{
float * b_d; //array to be assigned to devicie
float * c_d;
int N = 14;
int blocksize = 4;
int nBlocks = N/blocksize + (N%blocksize == 0?0:1);
printf("%d\n",nBlocks);
//allocate memory to the array in CUDA
cudaMalloc((void **) &b_d, sizeof(float)*N);
cudaMalloc((void **) &c_d, sizeof(float)*N);
cudaMemcpy(b_d,a_d, sizeof(float)*N,cudaMemcpyHostToDevice);
device_greetings_kernel <<< nBlocks, blocksize >>> (b_d, N);
cudaMemcpy(c_d,b_d, sizeof(float)*N,cudaMemcpyDeviceToHost);
cudaMalloc((void **) &a_d, sizeof(float)*N);
cudaMemcpy(a_d,b_d, sizeof(float)*N,cudaMemcpyDeviceToHost);
//traverse the array
for(int i=0;i<N;i++)
{
//increment c_d by 1
//c_d[i]=c_d[i]+1;
printf("%f\n",a_d[i]);
printf("%f\n", c_d[i]);
}
cudaFree(b_d);
cudaFree(c_d);
printf("Hello World from device.!!!\n");
}
helloWorld.cpp
#include <stdio.h>
#include <iostream>
#include "helloWorld.cuh"
using namespace std;
int main()
{
std::cout << "Hello World. !!!" << std::endl;
//create an array in host
float * a_h, * b_h; //array to be assigned in host
int N = 14;
int i;
a_h = new float[N]; //allocate array values in host
b_h = new float[N]; //allocate array values in host
for(i=0;i<N;i++)
{
a_h[i]=10.f+i;
b_h[i]=0.f;
std::cout << "a_h["<<i<<"]:" << a_h[i]<<std::endl;
//std::cout << "b_h["<<i<<"]:" << b_h[i]<<std::endl;
}
// launch a kernel with a single thread to greet from the device
device_greetings(a_h);
for(int j=0;j<N;j++)
{
std::cout<< "a_h["<<j<<"]: "<< a_h[j]<<std::endl;
}
free(a_h);
free(b_h);
return 0;
}
float * c_d; – Seems to be device pointer:
…
cudaMalloc((void **) &c_d, sizeof(float)*N)
…
cudaMemcpy(c_d,b_d, sizeof(float)*N,cudaMemcpyDeviceToHost); – c_d would need to be a pointer to host side memory, so just pass it like you passed a_d…
float * c_d; – Seems to be device pointer:
…
cudaMalloc((void **) &c_d, sizeof(float)*N)
…
cudaMemcpy(c_d,b_d, sizeof(float)*N,cudaMemcpyDeviceToHost); – c_d would need to be a pointer to host side memory, so just pass it like you passed a_d…