// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
// includes, GL
#include <GL/glew.h>
//#include <GL/freeglut.h>
#include <GL/glut.h>
// includes CUDA <--> C++ interlops
#include <cuda_runtime.h>
#include <cutil_inline.h>
#include <cutil_gl_inline.h>
#include <cutil_gl_error.h>
#include <cuda_gl_interop.h>
//define variables
#define XDIM 1000 //original value 1000
#define YDIM 1024 //original value 1024
#define FRAMES 20
#define START_WAVELENGTH 7.5226E+02
#define WAVELENGTH_SPACING 8.6969E-02
#define SECOND_ORDER_CORRECTION -4.2264E-06
#define THIRD_ORDER_CORRECTION 8.9511E-10
#define FOURTH_ORDER_CORRECTION -1.7987E-13
#define LINE_LENGTH 1024 //Note same as YDIM, should be fixed
#define CAPTURE_LINE_OFFSET 800
#define RESAMPLE_POINTS 1024
// variables into cuda
unsigned char a[XDIM*XDIM*YDIM];
float resamp_1D[LINE_LENGTH*2], original_cpu[FRAMES*XDIM*YDIM], *dev_original;
float k_resampledspacing;
cuComplex *d_in;
float *dev_k_resampledspacing, *dc_subtracted;
int *dev_frame, *dev_sum;
float *dev_resamp, *dev_b;
// pbo variables
GLuint pbo;
struct cudaGraphicsResource *cuda_pbo_resource;
char inputfile[] = "joecornearadialvolume.unp";
extern "C"
void launch_kernel(int* sum, float* original_data, int* frame_num, cuComplex* d, float* k_resamp, float* dc_subtracted, float* resample, float4 *ptr);
// Read Data from file
void readData() {
FILE *fp;
long size;
fp=fopen(inputfile, "rb");
if (fp==NULL) perror ("Error opening data file");
else {
fseek (fp, 0, SEEK_END);
size=ftell (fp);
printf ("Size of joecornearadialvolume.unp: %ld bytes.\n",size);
rewind (fp);
}
fread(a, 2, XDIM*YDIM*FRAMES, fp);
fclose(fp);
}
void convertor(){
for(int i=0;i<XDIM*YDIM*FRAMES; i++){
original_cpu[i] = (float)a[i];
}
}
void calc_resample_coefficients() {
float lambda[LINE_LENGTH];
float k_sampled[LINE_LENGTH];
float k_resampled[LINE_LENGTH];
float lambda0 = START_WAVELENGTH;
float lambda1 = WAVELENGTH_SPACING;
float lambda2 = SECOND_ORDER_CORRECTION;
float lambda3 = THIRD_ORDER_CORRECTION;
float lambda4 = FOURTH_ORDER_CORRECTION;
float kmax;
float kmin;
for (int y=0; y<LINE_LENGTH; y++) {
lambda[y] = lambda0 + lambda1*(y+CAPTURE_LINE_OFFSET) +
lambda2*pow(float(y+CAPTURE_LINE_OFFSET), 2) +
lambda3*pow(float(y+CAPTURE_LINE_OFFSET), 3) +
lambda4*pow(float(y+CAPTURE_LINE_OFFSET), 4);
k_sampled[y] = 1000000./lambda[y];
}
kmin = 1000000./lambda[0]; //wavenumber in mm^-1
kmax = 1000000./lambda[LINE_LENGTH-1];
k_resampledspacing = (kmin - kmax)/(LINE_LENGTH-1);
for (int y=0; y<LINE_LENGTH; y++) {
k_resampled[y] = kmin + (float(y)/(LINE_LENGTH - 1))*(kmax - kmin);
}
for (int y=0; y<LINE_LENGTH; y++) {
for (int i=0; i<LINE_LENGTH; i++) {
if (k_resampled[y] >= k_sampled[i] && k_resampled[y+1] < k_sampled[i]) {
resamp_1D[y*2+0] = float(i);
resamp_1D[y*2 +1] = k_resampled[y] - k_sampled[i];
break;
}
}
}
}
//simply use cudaGraphicsMapFlagsWriteDiscard as cudaGraphicsResource
void createVBO(GLuint* pixel_buffer, struct cudaGraphicsResource **cuda_pixel_resource)
{
// create buffer object
glGenBuffers(1, pixel_buffer);
glBindBuffer(GL_ARRAY_BUFFER, *pixel_buffer);
//register buffer on cuda
unsigned int size = XDIM * YDIM * 4 * sizeof(float);
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
cutilSafeCall(cudaGraphicsGLRegisterBuffer(cuda_pixel_resource, *pixel_buffer, cudaGraphicsMapFlagsWriteDiscard));
}
void display() {
// Map buffer object for writing from CUDA
float4* pix_buff;
cudaGraphicsMapResources(1, &cuda_pbo_resource, 0);
size_t num_bytes;
cudaGraphicsResourceGetMappedPointer((void**)&pix_buff, &num_bytes, cuda_pbo_resource);
// Execute kernel
// will execute by calling kernel.cu file
launch_kernel(dev_sum, dev_original, dev_frame, d_in, dev_k_resampledspacing, dc_subtracted, dev_resamp, pix_buff);
// Unmap buffer object
cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0);
//////////////////////////////////////////////////////////
// Under is the part I do not understand clear
//////////////////////////////////////////////////////////
// copy to Open_gl texture
// Render from buffer object
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glBindBuffer(GL_ARRAY_BUFFER, pbo);
glVertexPointer(4, GL_FLOAT, 0, 0);
glEnableClientState(GL_VERTEX_ARRAY);
glDrawArrays(GL_POINTS, 0, XDIM * YDIM);
glDisableClientState(GL_VERTEX_ARRAY);
// Swap buffers
glutSwapBuffers();
glutPostRedisplay();
}
void deleteVBO() {
cudaGraphicsUnregisterResource(cuda_pbo_resource);
glDeleteBuffers(1, &pbo);
}
int main() {
// Pre-Calculation
readData();
convertor(); //convert values into single float
calc_resample_coefficients();
//Cuda Memory Allocation
int frame = 0, sum =0;
cudaMalloc((void**)&dev_frame, sizeof(int));
cudaMalloc((void**)&dev_sum, sizeof(int));
cudaMalloc((void**)&dev_original, sizeof(float)*FRAMES*XDIM*YDIM);
cudaMalloc((void**)&d_in,sizeof(cuComplex)*YDIM*XDIM);
cudaMalloc((void**)&dev_resamp, sizeof(float)*LINE_LENGTH*2);
cudaMalloc((void**)&dev_k_resampledspacing, sizeof(float));
cudaMalloc((void**)&dc_subtracted, sizeof(float)*XDIM*YDIM);
cudaMemcpy(dev_k_resampledspacing, &k_resampledspacing, sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(dev_resamp, resamp_1D, sizeof(float)*LINE_LENGTH*2, cudaMemcpyHostToDevice);
cudaMemcpy(dev_original, original_cpu, sizeof(float)*FRAMES*XDIM*YDIM, cudaMemcpyHostToDevice);
cudaMemcpy(dev_frame, &frame, sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(dev_sum, &sum, sizeof(int), cudaMemcpyHostToDevice);
// Explicitly set device
cudaGLSetGLDevice(0);
glutDisplayFunc(display);
// Create buffer object and register it with CUDA
createVBO(&pbo, &cuda_pbo_resource);
// Loop
glutMainLoop();
}
This is the actual code. Will this help? Thank you.