Hi all
After a week of frustration I have the following question.
I am making a static library of my cuda raytracer which I then want to link with my programs. It looks like I can link the static library but I’m not able to use printf() statements anymore.
My library is build from several files:
GPURaytracer.h
#ifndef __GPURAYTRACER_H
#define __GPURAYTRACER_H
#include "raytracer_kernel.h"
#include <vector>
#include "Dose/Photon/cube/cube.h"
#include "Dose/Photon/cube/cubeinfo.h"
#include "Dose/Photon/cube/cubecalc.h"
#include "CUDA_SDK/cutil.h"
#include "cuda.h"
#include "cuda_runtime.h"
#include "constants.h"
#include <iostream>
#include <cmath>
template <class VType>
class GPURaytracer {
public:
GPURaytracer(const float x1, const float y1, const float z1,
const std::vector<float> & x_plane, const std::vector<float> & y_plane, const std::vector<float> & z_plane,
const short int dimx, const short int dimy, const short int dimz,
const float dx, const float dy, const float dz,
const VType *sData);
~GPURaytracer();
void DoTrace();
void RadiologicalDepth(float *radiologicalDepth_);
void offSet(int offX, int offY);
void blockSize(int blockX, int blockY);
private:
// Memory size of the array holding the RD.
unsigned int memSize_;
// Array holding radiologicalDepth.
float* d_radiologicalDepth_;
// The CT dataset.
VType *d_CT_dataset_;
// Grid dimension in the y-direction (?).
int gridX_;
short int dimx_, dimy_, dimz_;
unsigned int blockSize_x_, blockSize_y_, offset_x_, offset_y_;
};
template <class VType>
GPURaytracer<VType>::GPURaytracer(const float x1, const float y1, const float z1,
const std::vector<float> & x_plane, const std::vector<float> & y_plane, const std::vector<float> & z_plane,
const short int dimx, const short int dimy, const short int dimz,
const float dx, const float dy, const float dz,
const VType *sData)
{
some cudaMemcpyToSymbols...
}
//destructor
template <class VType>
GPURaytracer<VType>::~GPURaytracer() {
std::cout << "Destructor called." << std::endl;
}
template <class VType>
void GPURaytracer<VType>::RadiologicalDepth(float* h_radiologicalDepth){
std::cout << "Returning radiological depth..." << std::endl;
CUDA_SAFE_CALL(cudaMemcpy(h_radiologicalDepth, d_radiologicalDepth_, memSize_, cudaMemcpyDeviceToHost) );
}
template <class VType>
void GPURaytracer<VType>::offSet(int offX, int offY)
{
offset_x_ = offX;
offset_y_ = offY;
}
template <class VType>
void GPURaytracer<VType>::blockSize(int blockX, int blockY)
{
blockSize_x_ = blockX;
blockSize_y_ = blockY;
}
template <class VType>
void GPURaytracer<VType>::DoTrace()
{
//XXX TODO
//Implementatie grid met generieke afmetingen != veelvoud 16
//
std::cout << "Raytracing..." << std::endl;
int a = floor(dimx_ / blockSize_x_);
int b = floor(dimy_ / blockSize_y_);
int blockSize_x_temp = blockSize_x_;
std::cout << "a: " << a << "\t" << "b: " << b << std::endl;
if(a > 0 && b > 0) {
printf("line 133\n");
dim3 dimBlock(blockSize_x_, blockSize_y_);
dim3 dimGrid(a*dimz_, b);
gpu_raytracer(d_radiologicalDepth_, d_CT_dataset_,
(dimGrid.x / dimz_), blockSize_x_, blockSize_y_,
offset_x_, offset_y_, dimBlock, dimGrid);
blockSize_x_ = dimx_ - blockSize_x_ * a;
offset_x_ = blockSize_x_ * a;
if(blockSize_x_ > 0) {
dim3 dimBlock(blockSize_x_, blockSize_y_);
dim3 dimGrid(1*dimz_, b);
gpu_raytracer(d_radiologicalDepth_, d_CT_dataset_,
(dimGrid.x / dimz_), blockSize_x_, blockSize_y_,
offset_x_, offset_y_, dimBlock, dimGrid);
}
blockSize_y_ = dimy_ - blockSize_y_ * b;
offset_y_ = blockSize_y_ * b;
if(blockSize_x_ > 0 && blockSize_y_ > 0) {
dim3 dimBlock(blockSize_x_, blockSize_y_);
dim3 dimGrid(1*dimz_, 1);
gpu_raytracer(d_radiologicalDepth_, d_CT_dataset_,
(dimGrid.x / dimz_), blockSize_x_, blockSize_y_,
offset_x_, offset_y_, dimBlock, dimGrid);
}
offset_x_ = 0;
blockSize_x_ = blockSize_x_temp;
if(blockSize_y_ > 0) {
dim3 dimBlock(blockSize_x_, blockSize_y_);
dim3 dimGrid(a*dimz_, 1);
gpu_raytracer(d_radiologicalDepth_, d_CT_dataset_,
(dimGrid.x / dimz_), blockSize_x_, blockSize_y_,
offset_x_, offset_y_, dimBlock, dimGrid);
}
}
}
#endif
this calls raytracer_kernel.h
#ifndef __RAYTRACER_KERNEL__
#define __RAYTRACER_KERNEL__
//#ifdef __cplusplus
#include "cuda.h"
#include "cuda_runtime.h"
void gpu_raytracer(float *radiologicalPath, short int *rho,
int gridX, unsigned int blockSize_x, unsigned int blockSize_y,
unsigned int offset_x, unsigned int offset_y, dim3 dimBlock, dim3 dimGrid);
#endif
raytracer_kernel.cu
//calculate for all voxels the delta x, y and z
#include <stdio.h>
#include <cstdio>
#include <cassert>
#include "constants.h"
#include "raytracer_kernel.h"
#include "compute_RD_shell.cu"
__global__ void raytracer_kernel(float *radiologicalPath, short int *rho,
int gridX, unsigned int blockSize_x, unsigned int blockSize_y,
unsigned int offset_x, unsigned int offset_y) {
Some fancy stuff happens here...
and also the printf()s
}
void gpu_raytracer(float *radiologicalPath, short int *rho,
int gridX, unsigned int blockSize_x, unsigned int blockSize_y,
unsigned int offset_x, unsigned int offset_y, dim3 dimBlock, dim3 dimGrid)
{
//printf("Calling raytracer kernel...\n");
raytracer_kernel<<<dimGrid,dimBlock>>>(radiologicalPath, rho,
gridX, blockSize_x, blockSize_y,
offset_x, offset_y);
}
And the raytracer_kernel.cu calls a function in compute_RD_shell.cu which calls some functions in computeRD.cu
If I make these files using the nvcc -c -deviceemu mode
and after that combine all the object files into an .a file with ar
it all looks like it is going fine.
but when I then make my TestRayTracer.C file and link the good libraries I’m not getting any errors but its also not showing any printf()s
I hope some1 can comment on this strange behavior.
Thanks,
Jordy