Hello Im using 9800GT on Windows7 OS. The following code says, I have a invalid kernel image at line 77 of gpu_anim.h. gpu_anim.h is a header file which displays image using data directly from gpu. This code was provided from NVIDIA and I have never touched anything on such headerfile. Does anyone recognize the problem? Thank you.
// Test program for animated display of oct .unp data files
// Has gpu computation and direct rendering of results via OpenGL
// compile with: nvcc -Llib -m32 src\octprocessor.cu -o src\octprocessor.exe
#define SWAP(a,b) tempr=(a);(a)=(b);(b)=tempr
#include "../common/book.h"
#include "../common/gpu_anim.h"
#include "device_fourier.h"
#define XDIM 1000
#define YDIM 1024
#define FRAMES 50
// Parameters from Coulter Cornea SDOCT system
#define START_WAVELENGTH 7.5727E+02
#define WAVELENGTH_SPACING 8.6911E-02
#define SECOND_ORDER_CORRECTION -5.7555E-06
#define THIRD_ORDER_CORRECTION 2.2916E-9
#define FOURTH_ORDER_CORRECTION -5.6882E-13
#define CAPTURE_LINE_OFFSET 450
#define RESAMPLE_POINTS 1024
#define D1 3.3e-5
unsigned short a[XDIM*YDIM*FRAMES]; // raw input data from file or device
int b[XDIM*YDIM*FRAMES]; // DC subtracted, integer input data sent to gpu (real)
int c[2*XDIM*YDIM*FRAMES]; // processed data returned from gpu (complex)
int sum_spectrum[YDIM];
float resamp[YDIM*2];
float k_resampledspacing;
int *dev_b, *dev_c, *dev_sum_spectrum;
float *dev_resamp;
char inputfile[] = "joecornearadialvolume1.unp";
bool dcsubtract = true;
bool fft = true;
bool resample = true;
void calc_resample_coefficients() {
double lambda[YDIM];
double k_sampled[YDIM];
double k_resampled[YDIM];
double lambda0 = START_WAVELENGTH;
double lambda1 = WAVELENGTH_SPACING;
double lambda2 = SECOND_ORDER_CORRECTION;
double lambda3 = THIRD_ORDER_CORRECTION;
double lambda4 = FOURTH_ORDER_CORRECTION;
double kmax;
double kmin;
for (int y=0; y<YDIM; y++) {
lambda[y] = lambda0 + lambda1*(y+CAPTURE_LINE_OFFSET) +
lambda2*pow(double(y+CAPTURE_LINE_OFFSET), 2) +
lambda3*pow(double(y+CAPTURE_LINE_OFFSET), 3) +
lambda4*pow(double(y+CAPTURE_LINE_OFFSET), 4);
k_sampled[y] = 1000000./lambda[y];
}
kmin = 1000000./lambda[0]; //wavenumber in mm^-1
kmax = 1000000./lambda[YDIM-1];
k_resampledspacing = (kmin - kmax)/(YDIM-1);
for (int y=0; y<YDIM; y++) {
k_resampled[y] = kmin + (double(y)/(YDIM - 1))*(kmax - kmin);
// if (y%1 == 0) printf ("lambda[%d]=%f, k_sampled[%d]=%f, k_resampled[%d]=%f\n",y,lambda[y],y,k_sampled[y],y,k_resampled[y]);
}
for (int y=0; y<YDIM; y++) {
for (int i=0; i<YDIM; i++) {
if (k_resampled[y] >= k_sampled[i] && k_resampled[y+1] < k_sampled[i]) {
resamp[y+0*YDIM] = double(i);
resamp[y+1*YDIM] = k_resampled[y] - k_sampled[i];
break;
}
}
}
}
void readData( ) {
FILE *fp;
long size;
fp=fopen(inputfile, "rb");
if (fp==NULL) perror ("Error opening file");
else {
fseek (fp, 0, SEEK_END);
size=ftell (fp);
printf ("Size of inputfile: %ld bytes.\n",size);
rewind (fp);
}
fread(a, 2, XDIM*YDIM*FRAMES, fp);
fclose(fp);
for (int f=0; f<FRAMES; f++) {
for (int x=0; x<XDIM; x++) {
for (int y=0; y<YDIM; y++) {
b[y+x*YDIM+f*YDIM*XDIM] = (int) a[y+x*YDIM+f*YDIM*XDIM];
}
}
}
}
void calc_sum_spectrum(int f) { //should move this onto gpu but would require thread synchronization
if (dcsubtract == true) { // Do DC subtract
for (int y=0; y<YDIM; y++) { sum_spectrum[y]=0;}
for (int x=0; x<XDIM; x++) {
for (int y=0; y<YDIM; y++) {
sum_spectrum[y] += b[y+x*YDIM+f*YDIM*XDIM];
}
}
}
}
__global__ void kernel( int *b, int *c, int *sum_spectrum, float *resamp, bool dcsubtract, bool resample, bool fft, float k_resampledspacing, uchar4 *ptr ){
int x = blockIdx.x*XDIM/2 + threadIdx.x; // XDIM>>1 = XDIM/2
int i, offset;
float data[2*YDIM];
float phase, temp, datatemp;
if (fft == true) { // Do FFT processing
for (int y=0; y<YDIM; y++) {
if (dcsubtract == true) { b[y+x*YDIM] -= sum_spectrum[y]/XDIM;} // Do DC subtract
if (resample == true) { // Do resampling
i = int(resamp[y+0*YDIM]);
datatemp = b[x*YDIM+i] - resamp[y+1*YDIM]*(b[x*YDIM+i+1]-b[x*YDIM+i])/k_resampledspacing;
}
else {
datatemp = b[x*YDIM+y];
}
phase = D1*(y-YDIM/2)*(y-YDIM/2); // second order dispersion correction
data[2*y] = datatemp * __cosf(phase);
data[2*y+1] = datatemp * __sinf(phase);
}
four1(data, YDIM, 1);
for (int y=0; y<YDIM/2; y++) {
c[x*2*YDIM+2*y] = data[2*y];
c[x*2*YDIM+2*y+1] = data[2*y+1];
temp = __log10f(data[2*y]*data[2*y] + data[2*y+1]*data[2*y+1])/12.;
offset = x + (YDIM/2 - y) * XDIM;
ptr[offset].x = 255 * temp - 100;
ptr[offset].y = 255 * temp - 100;
ptr[offset].z = 255 * temp - 100;
ptr[offset].w = 255;
}
}
else { // Don't do FFT processing
for (int y=0; y<YDIM; y++) {
if (dcsubtract == true) {b[y+x*YDIM] -= sum_spectrum[y]/YDIM;} // Do DC subtract
int offset = x + y * XDIM;
ptr[offset].x = 255 * (float)b[x*YDIM + y]/(65535);
ptr[offset].y = 255 * (float)b[x*YDIM + y]/(65535);
ptr[offset].z = 255 * (float)b[x*YDIM + y]/(65535);
ptr[offset].w = 255;
}
}
}
double get_framerate ( ) {
static clock_t newticks, oldticks;
double framerate;
newticks = clock( );
framerate = CLOCKS_PER_SEC/( newticks - oldticks );
oldticks = newticks;
return framerate;
}
void generate_frame( uchar4 *pixels, void*, int ticks) {
// Note ticks variable generated by cpu_anim.h not used
// dim3 grid(XDIM);
static int frame;
frame++;
if ( frame >= FRAMES ) frame = 1;
calc_sum_spectrum(frame);
HANDLE_ERROR( cudaMemcpy( dev_b, b + frame*XDIM*YDIM, XDIM*YDIM*sizeof(int), cudaMemcpyHostToDevice ) );
HANDLE_ERROR( cudaMemcpy( dev_sum_spectrum, sum_spectrum, YDIM*sizeof(int), cudaMemcpyHostToDevice ) );
kernel<<<2,XDIM/2>>>( dev_b, dev_c, dev_sum_spectrum, dev_resamp, dcsubtract, resample, fft, k_resampledspacing, pixels );
HANDLE_ERROR( cudaMemcpy( c + frame*XDIM*2*YDIM, dev_b, XDIM*YDIM*sizeof(int), cudaMemcpyDeviceToHost ) );
printf ("Frame: %3d; Frames/sec: %4.1f\r", frame, get_framerate ( ) );
}
int main(int argc, char* argv[]) {
GPUAnimBitmap bitmap( XDIM, YDIM/2, NULL );
printf("Usage: no arguments for default values or add -f inputfile for input file or\n");
printf("-nodcsubtract, -nofft, or -noresample to disable any of these functions.\n");
printf("argc = %d, argv[0] = %s, argv[1] = %s\n",argc, argv[0], argv[1]);
for (int i = 1; i < argc; i++) { //Iterate over argv[] to get the parameters stored inside.
if (strcmp(argv[i],"-f") == 0) { // Next argument is filename to process
strcpy(inputfile,argv[i + 1]); }
if (strcmp(argv[i],"-nodcsubtract") == 0) {
dcsubtract = false;
printf("No DC subtraction.\n"); }
else if (strcmp(argv[i],"-nofft") == 0) {
fft = false;
printf("No fft.\n"); }
else if (strcmp(argv[i],"-noresample") == 0) {
resample = false;
printf("No resampling.\n"); }
}
calc_resample_coefficients();
HANDLE_ERROR( cudaMalloc( (void**)&dev_b, XDIM*YDIM*sizeof(int) ) );
HANDLE_ERROR( cudaMalloc( (void**)&dev_c, 2*XDIM*YDIM*sizeof(int) ) );
HANDLE_ERROR( cudaMalloc( (void**)&dev_sum_spectrum, YDIM*sizeof(int) ) );
HANDLE_ERROR( cudaMalloc( (void**)&dev_resamp, 2*YDIM*sizeof(float) ) );
HANDLE_ERROR( cudaMemcpy( dev_resamp, resamp, 2*YDIM*sizeof(float), cudaMemcpyHostToDevice ) );
readData( );
bitmap.anim_and_exit( (void (*)(uchar4*, void*, int))generate_frame, NULL );
}