Hello,
I am trying to combine my code with OpenGL in order to visualize in real time the configurations I generate with CUDA. I am using some data structure which I am trying ot integrate with the example I found the book “CUDA by Example”, chapter 8. I am able to generate one image, but I am not able to do a set of plots. I can only see the last configuration. Below is my code. For testing purposes I have a bunch of “particles” which just oscillate. At every iteration a density profile is created using the host function animate, with three calls (update particle position, create the field and copy to gpu ram, and map to pixels using a cuda kernel). I am also interested if it possible to combine it with some free programs ĺike blender or something similar.
(The include files are from the book https://developer.nvidia.com/cuda-example )
//rm a.out; nvcc -lGL -lGLU -lglut -O2 -lcufft -arch=sm_30 SPH.cu; optirun ./a.out
//rm a.out; nvcc -lGL -lGLU -lglut -O2 -lcufft -arch=sm_30 SPH.cu ; ./a.out
#include <cstdio>
#include <cmath>
#include <iostream>
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "cpu_bitmap.h"
#include "cuda_gl_interop.h"
# define bps 4
# define tpbrho 512
# define Ngas 256
# define Nsed 16
#define DIM 512
PFNGLBINDBUFFERARBPROC glBindBuffer = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers = NULL;
PFNGLGENBUFFERSARBPROC glGenBuffers = NULL;
PFNGLBUFFERDATAARBPROC glBufferData = NULL;
GLuint bufferObj;
cudaGraphicsResource *resource;
typedef struct {
double *h;
double *d;
} doublevector;
typedef struct {
doublevector xgas; //x-component of the position of gas particles
doublevector ygas; //y-component of the position of gas particles
doublevector zgas; //z-component of the position of gas particles
doublevector rhogas; //z-component of the position of gas particles
} field;
typedef struct {
double omega;
double dx;
double dy;
double hgas;
double mgas;
double rhoavegas;
int lx;
int ly;
int total_size;
} modelparams;
__host__ void fname(char *conf,const int sss)
{
if(sss<10)
{
sprintf(conf,"psi0000%d.txt",sss);
}
else if(sss<100)
{
sprintf(conf,"psi000%d.txt",sss);
}
else if(sss<1000)
{
sprintf(conf,"psi00%d.txt",sss);
}
else if(sss<10000)
{
sprintf(conf,"psi0%d.txt",sss);
}
else if(sss<100000)
{
sprintf(conf,"psi%d.txt",sss);
}
}
__host__ void parameterinit(modelparams *parameters)
{
double pi=acos(-1.0);
parameters[0].dx=0.1;
parameters[0].dy=0.1;
parameters[0].hgas=1.0;
parameters[0].rhoavegas=15.0; //average vañue of the gas density
parameters[0].mgas=(parameters[0].rhoavegas*parameters[0].total_size*parameters[0].dx*parameters[0].dy)/(double)Ngas; // individual mass of a gas gas particle
}
__host__ void pointerinit(field *gassed,modelparams *parameters)
{
parameterinit(parameters);
gassed[0].xgas.h=(new double[Ngas]);
gassed[0].ygas.h=(new double[Ngas]);
gassed[0].zgas.h=(new double[Ngas]);
cudaMalloc((void**)&gassed[0].xgas.d, sizeof(double)*(Ngas));
cudaMalloc((void**)&gassed[0].ygas.d, sizeof(double)*(Ngas));
cudaMalloc((void**)&gassed[0].zgas.d, sizeof(double)*(Ngas));
gassed[0].rhogas.h=(new double[parameters[0].total_size]);
cudaMalloc((void**)&gassed[0].rhogas.d, sizeof(double)*(parameters[0].total_size));
}
static void key_func( unsigned char key, int x, int y ) {
switch (key) {
case 27:
// clean up OpenGL and CUDA
( cudaGraphicsUnregisterResource( resource ) );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 );
glDeleteBuffers( 1, &bufferObj );
exit(0);
}
}
static void draw_func( void ) {
// we pass zero as the last parameter, because out bufferObj is now
// the source, and the field switches from being a pointer to a
// bitmap to now mean an offset into a bitmap object
glDrawPixels( DIM,DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 );
glutSwapBuffers();
}
__device__ unsigned char value( float n1, float n2, int hue ) {
if (hue > 360) hue -= 360;
else if (hue < 0) hue += 360;
if (hue < 60)
return (unsigned char)(255 * (n1 + (n2-n1)*hue/60));
if (hue < 180)
return (unsigned char)(255 * n2);
if (hue < 240)
return (unsigned char)(255 * (n1 + (n2-n1)*(240-hue)/60));
return (unsigned char)(255 * n1);
}
__global__ void kernel( uchar4 *optr, double *rho, double nnn,double mmm ) {
// map from threadIdx/BlockIdx to pixel position
int ibl=blockIdx.y+blockIdx.x*gridDim.y;
int offset=threadIdx.x+blockDim.x*ibl;
// int y=offset/gridDim.x;
// int x=offset-y*gridDim.x;
if(offset<DIM*DIM){
float l = 1-(rho[offset]-nnn)/(mmm-nnn);
float s = 1;
int h = (180 + (int)(360.0f * rho[offset])) % 360;
float m1, m2;
if (l <= 0.5f)
m2 = l * (1 + s);
else
m2 = l + s - l * s;
m1 = 2 * l - m2;
optr[offset].x = value( m1, m2, h+120 );
optr[offset].y = value( m1, m2, h );
optr[offset].z = value( m1, m2, h -120 );
optr[offset].w = 255;
}
}
__host__ void getrho(field gassed,modelparams parameters)
{
double pi=acos(-1.0);
int lx=parameters.lx;
int ly=parameters.ly;
int total_size=parameters.total_size;
double dx=parameters.dx;
double dy=parameters.dy;
double h=parameters.hgas;
int ihx=floor(h/dx)+1,ihy=floor(h/dy)+1;
for(int crho=0;crho<lx*ly;crho++){
gassed.rhogas.h[crho]=0.0;
}
double x,y;
for(int igas=0;igas<Ngas;igas++){
x=gassed.xgas.h[igas];
y=gassed.ygas.h[igas];
int ii=x/dx;
int jj=y/dy;
// printf("\n %d %f %f",igas, x,y);
for (int xi=ii-ihx;xi<=ii+ihx;xi++){
int ix=xi;
ix=(ix<0?ix+lx:ix); // periodic boudnary conditions
ix=(ix>=lx?ix-lx:ix);
for(int yj=jj-ihy;yj<=jj+ihy;yj++){
int iy=yj;
iy=(iy<0?iy+ly:iy);
iy=(iy>=ly?iy-ly:iy);
double rr=sqrt(pow((x-(xi+0.5)*dx),2)+pow(y-((yj+0.5)*dy),2));
double wrh=0;
if(rr/h<=0.5){
wrh=8.0/(pi*pow(h,3))*(1-6.0*pow((rr/h),2)+6.0*pow((rr/h),3));
}
if(rr/h>0.5 && rr/h<=1.0){
wrh=(8.0/(pi*pow(h,3)))*2.0*pow((1-rr/h),3);
}
int crho=iy+ix*ly;
//gassed.rhogas.h[crho]=1.0;
gassed.rhogas.h[crho]=gassed.rhogas.h[crho]+wrh;
//if(x>
//printf("\n %d %d %d %d %d %f",igas, ii,jj,xi,yj,wrh);
}}}
double maxrho=0.0;
double minrho=100.0;
for(int crho=0;crho<lx*ly;crho++){
if(gassed.rhogas.h[crho]>maxrho) maxrho=gassed.rhogas.h[crho];
if(gassed.rhogas.h[crho]<minrho) minrho=gassed.rhogas.h[crho];
}
printf("\n MAXrho= %lf MINrho= %lf \n",maxrho,minrho);
cudaMemcpy(gassed.rhogas.d,gassed.rhogas.h, sizeof(double)*(total_size),cudaMemcpyHostToDevice) ;
// printf("\n The gas dummy local density was initialized. The data was copied from host to device \n");
}
__host__ void sphoscillatory(field *gassed,modelparams *parameters,int ticks)
{
double pi=acos(-1.0);
double omega=parameters[0].omega;
int lx=parameters[0].lx;
int ly=parameters[0].ly;
// int total_size=parameters[0].total_size;
double dx=parameters[0].dx;
double dy=parameters[0].dy;
double h=parameters[0].hgas;
int ihx=floor(h/dx)+1,ihy=floor(h/dy)+1;
int igas=0;
for(int ii=0; ii<16;ii++){
for(int jj=0;jj<16;jj++){
double x=ii*((lx*dx)/16.0)+0.3*h*cos((1+igas)*ticks*2.0*pi/omega);
double y=jj*((ly*dy)/16.0)+0.3*h*sin((1+igas)*ticks*2.0*pi/omega);
gassed[0].xgas.h[igas]=x;
gassed[0].ygas.h[igas]=y;
igas++;
}}
// printf("\n igas= %d Ngas= %d ihx= %d ihy= %d\n", igas, Ngas,ihx,ihy);
cudaMemcpy(gassed[0].xgas.d,gassed[0].xgas.h, sizeof(double)*(Ngas),cudaMemcpyHostToDevice) ;
printf("\n The gas sph particles were initialized. The data was copied from host to device \n");
}
__host__ void animate(uchar4* devPtr,field *gassed,modelparams *parameters,int t)
{
double pi=acos(-1.0);
int lx=parameters[0].lx;
int ly=parameters[0].ly;
sphoscillatory(gassed,parameters,t);
getrho(gassed[0],parameters[0]);
kernel<<<(lx*ly+512-1)/512,512>>>( devPtr, gassed[0].rhogas.d,0.0,2.475487);
}
int main( int argc, char **argv )
{
cudaSetDevice(0);
size_t free, total;
printf("\n");
cudaMemGetInfo(&free,&total);
printf("%lu KB free of total %lu KB at the beginning\n",(free/1024),(total/1024));
const int lx=DIM,ly=lx,total_size=lx*ly;
dim3 gridrho,threadsrho;
// FILE * pFile;
// char conf[13];
threadsrho.x=tpbrho;
threadsrho.y=1;
threadsrho.z=1;
gridrho.x=(int)ceil((double)sqrt(total_size)/sqrt((double)threadsrho.x));
gridrho.y=(int)ceil((double)(total_size)/((double)threadsrho.x*(double)gridrho.x))+1;
gridrho.z=1;
printf("\n no of blocks %d, no of threades per block %d,\n total no of threads %d, total size=lx*ly %d\n", gridrho.x*gridrho.y,threadsrho.x,gridrho.x*gridrho.y*threadsrho.x,total_size);
field gassed[1];
modelparams parameters[1];
parameters[0].omega=53;
parameters[0].lx=lx;
parameters[0].ly=ly;
parameters[0].total_size=total_size;
pointerinit(gassed,parameters);
cudaMemGetInfo(&free,&total);
printf("\n%lu KB free of total %lu KB after all memory allocations\n",(free/1024),(total/1024));
cudaGLSetGLDevice(0);
// these GLUT calls need to be made before the other GL calls
glutInit( &argc, argv );
glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA );
glutInitWindowSize( DIM, DIM );
glutCreateWindow( "Test" );
glBindBuffer = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
glGenBuffers = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
glBufferData = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");
// the first three are standard OpenGL, the 4th is the CUDA reg
// of the bitmap these calls exist starting in OpenGL 1.5
glGenBuffers( 1, &bufferObj );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj );
glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4, NULL, GL_DYNAMIC_DRAW_ARB );
cudaGraphicsGLRegisterBuffer( &resource, bufferObj, cudaGraphicsMapFlagsNone ) ;
// do work with the memory dst being on the GPU, gotten via mapping
( cudaGraphicsMapResources( 1, &resource, NULL ) );
uchar4* devPtr;
size_t size;
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr,&size, resource);
for(int t=0;t<1000; t++){
animate(devPtr,gassed,parameters,t);
// set up GLUT and kick off main loop
cudaGraphicsUnmapResources( 1, &resource, NULL );
}
glutKeyboardFunc( key_func );
glutDisplayFunc( draw_func );
glutMainLoop();
}