problem compiling cuda code HELP

hello ladies and gents

i have this code (program if you like):

[codebox]///////////////////////////////////////////////////////////////////////////////

// Name: qpsk2D.cu

// Copyright: xxx

// Author: xxx

// Date: xxx

// Description:

///////////////////////////////////////////////////////////////////////////////

include <stdlib.h>

include <stdio.h>

include <string.h>

include <math.h>

include <cuda.h>

include <cuda_runtime.h>

include <cutil.h>

include <cutil_inline.h>

//include “kernel_qpsk2D.cu”

//include “structures.h”

///////////////////////////////////////////////////////////////////////////////

// Name: structures.h

// Copyright: xxx

// Author: xxx

// Date: xxx

// Description: Declaration of global types

///////////////////////////////////////////////////////////////////////////////

#ifndef STRUCTURES_H

define STRUCTURES_H

///////////////////////////////////////////////////////////////////////////////

// Global types

///////////////////////////////////////////////////////////////////////////////

typedef struct {

float real;

float imag;

} Complex;

endif

///////////////////////////////////////////////////////////////////////////////

// Name: kernel_qpsk2D.cu

// Copyright: xxx

// Author: xxx

// Date: xxx

// Description:

///////////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////////////

// Kernel that performs QPSK 2D mapping in global memory

////////////////////////////////////////////////////////////////////////////////

global void qpsk2DGlobal( float *d_a, Complex *Lookup_d, Complex *d_b, int N ) {

// compute index in NxN array

int ix = blockIdx.x * blockDim.x + threadIdx.x;

int iy = blockIdx.y * blockDim.y + threadIdx.y;

// // create Lookup in shared memory

// float Lookup[ 4 ] = { -sqrt( 2.0 ), sqrt( 2.0 ), -sqrt( 2.0 ), sqrt( 2.0 ) };

// perform transformation

if ( ix < N && iy < N) {

  int idx = ix + iy * N;

  d_b[ idx ].real = Lookup_d[ d_a[ idx ] ].real;

  d_b[ idx ].real = Lookup_d[ d_a[ idx ] ].imag;

} // end if

} // end qpsk2DGlobal

///////////////////////////////////////////////////////////////////////////////

// Main program

///////////////////////////////////////////////////////////////////////////////

define DO_CPU

//#undef DO_CPU

define DO_GPU

//#undef DO_GPU

int main ( int argc, char **argv ) {

printf( “QPSK program for a 2D matrix…\n” );

printf( “===============================\n” );

////////////////////////////////////////////////////////////////////////////

// Create timers

////////////////////////////////////////////////////////////////////////////

printf( “Initialising timers…\n” );

unsigned int timer1, timer2, timer3, timer4 = 0;

cutCreateTimer( &timer1 ); // CPU timer

cutCreateTimer( &timer2 ); // GPU timer

cutCreateTimer( &timer3 ); // cudaMemcpy timer 1

cutCreateTimer( &timer4 ); // cudaMemcpy timer 2

////////////////////////////////////////////////////////////////////////////

// Initialise memory

////////////////////////////////////////////////////////////////////////////

printf( “Initialising memory…\n” );

const int N = 8; // make a big array with N elements

// float a_h[ N ][ N ]; // host data

// Complex b_h[ N ][ N ];

float *a_h[ N ]; // host data

Complex *b_h[ N ], *Lookup; // host data structure

a_h = ( float * )malloc( sizeof( float )NN ); // allocate array on host

b_h = ( Complex * )malloc( sizeof( Complex )NN ); // allocate array on host

Lookup = ( Complex * )malloc( sizeof( Complex )*4 );

// float *a_d, *b_d; // device data

float *a_d; // device data

Complex *b_d, *Lookup_d; // device data structure

cudaMallocHost( ( void ** ) &a_d, sizeof( float )NN ); // allocate array on device

cudaMallocHost( ( void ** ) &b_d, sizeof( Complex )NN ); // allocate array on device

cudaMallocHost( ( void ** ) &Lookup_d, sizeof( Complex )*4 ); // allocate array on device

////////////////////////////////////////////////////////////////////////////

// Initialize host array

////////////////////////////////////////////////////////////////////////////

printf( “Initiaising data matrix…\n” );

unsigned int data = 0xf01919c6; // randome 4byte worth of data

unsigned int mask = 0x00000003; // bit mask

unsigned int temp = 0x00000000; // temporary storage

for ( int i = 0; i < N; i++ ) {

  for ( int j = 0; j < N; j++ ) {

     temp = data & mask;

     a_h[ i ][ j ] = ( int )temp;

     data = data >> 2;

  } // end for

} // end for

////////////////////////////////////////////////////////////////////////////

// Initialise lookup table

////////////////////////////////////////////////////////////////////////////

Lookup[ 0 ].real = -sqrt( 2.0 ); Lookup[ 0 ].imag = -sqrt( 2.0 );

Lookup[ 1 ].real = sqrt( 2.0 ); Lookup[ 1 ].imag = -sqrt( 2.0 );

Lookup[ 3 ].real = sqrt( 2.0 ); Lookup[ 3 ].imag = sqrt( 2.0 );

Lookup[ 2 ].real = -sqrt( 2.0 ); Lookup[ 2 ].imag = sqrt( 2.0 );

cudaMemcpy( Lookup_d, Lookup, sizeof( Complex )*4, cudaMemcpyHostToDevice );

// float Lookup[ 4 ] = { -sqrt( 2.0 ), sqrt( 2.0 ), -sqrt( 2.0 ), sqrt( 2.0 ) };

ifdef DO_CPU

////////////////////////////////////////////////////////////////////////////

// Do calculation on host

////////////////////////////////////////////////////////////////////////////

printf( “Performing QPSK 2D operation on CPU…\n” );

cutStartTimer( timer1 ); // start CPU timer

for ( int i = 0; i < N; i++ ) {

  for ( int j = 0; j < N; j++ ) {

     b_h[ i ][ j ] = Lookup[ ( int )a_h[ i ][ j ] ];

  } // end for

} // end for

cutStopTimer( timer1 ); // stop CPU timer

endif

ifdef DO_GPU

////////////////////////////////////////////////////////////////////////////

// Do calculation on device

////////////////////////////////////////////////////////////////////////////

printf( “Performing QPSK 2D operation on GPU…\n” );

dim3 dimBlock( 2, 2 ); // # of threads per 2D block

//int n_blocks = N / block_size + ( N%block_size == 0 ? 0:1 ); // # of blocks per 1D grid

dim3 dimGrid( 4, 4 ); // # of blocks per 2D grid

cutStartTimer( timer2 ); // start GPU timer

printf( "Performing cuda memory copy operation… " );

cutStartTimer( timer3 ); // start cudaMemcpy timer 1

cudaMemcpy( a_d, a_h, sizeof( float )NN, cudaMemcpyHostToDevice );

cutStopTimer( timer3 ); // stop cudaMemcpy timer 1

printf( " %f ms\n", cutGetTimerValue( timer3 ) );

//qpsk <<< n_blocks, block_size >>> ( a_d, N );

qpsk2DGlobal <<< dimGrid, dimBlock >>> ( a_d, Lookup_d, b_d, N );

//qpsk2DShared <<< dim3( 16, 16 ), block_size, 4 * sizeof( float ) >>> ( a_d, N );

////////////////////////////////////////////////////////////////////////////

// Retrieve result from device and store it in host array

////////////////////////////////////////////////////////////////////////////

printf( "Performing memory copy operation… " );

cutStartTimer( timer4 ); // start cudaMemcpy timer 2

cudaMemcpy( b_h, a_d, sizeof( Complex )NN, cudaMemcpyDeviceToHost );

cutStopTimer( timer4 ); // stop cudaMemcpy timer 2

printf( " %f ms\n", cutGetTimerValue( timer4 ) );

cutStopTimer( timer2 ); // stop GPU timer

endif

////////////////////////////////////////////////////////////////////////////

// Print results

////////////////////////////////////////////////////////////////////////////

for ( int i = 0; i < N; i++ ) {

  for ( int j = 0; j < N; j++ ) {

     printf( "%2d.%2d %10f real:%10f imag:%10f\n", i, j, a_h[ i ][ j ], b_h[ i ][ j ].real, b_h[ i ][ j ].imag );

  } //end for

} // end for

printf( “CPU calculation time: %f ms\n”, cutGetTimerValue( timer1 ) );

printf( “GPU operation time: %f ms\n”, cutGetTimerValue( timer2 ) );

printf( “GPU calculation time: %f ms\n”, cutGetTimerValue( timer2 )-cutGetTimerValue( timer3 )-cutGetTimerValue( timer4 ) );

printf( “Lookup Table:\n” );

// for ( int i = 0; i < 4; i++ ) printf( “%d %10f\n”, i, Lookup[ i ] );

for ( int i = 0; i < N; i++ ) printf( “%2d real:%10f imag:%10f\n”, i, Lookup[ i ].real, Lookup[ i ].imag );

//printf( “# blocks: %d\n# threads per block: %d\n”, n_blocks, block_size );

////////////////////////////////////////////////////////////////////////////

// Cleanup

////////////////////////////////////////////////////////////////////////////

printf( “Cleanup memory…\n” );

free( a_h ); cudaFreeHost( a_d ); free( Lookup );

free( b_h ); cudaFreeHost( b_d ); cudaFreeHost( Lookup_d );

////////////////////////////////////////////////////////////////////////////

// Exit

////////////////////////////////////////////////////////////////////////////

printf( “Shutting down…\n” );

cutilExit( argc, argv );

} // end main

[/codebox]

which is giving me a headech:

i know you cant be bother counting lines and to be honest with you i dont know how to insert line numbers so here goes:

068: d_b[ idx ].real = Lookup_d[ d_a[ idx ] ].real;

069: d_b[ idx ].real = Lookup_d[ d_a[ idx ] ].imag;

106: float *a_h[ N ]; // host data

107: Complex *b_h[ N ], *Lookup; // host data structure

[list=1]

[*]i am pretty sure that thouse of you who are a bit better at programming are going to say to me that 106 & 107 is the result of my stupidity (inexprerience). so perhaps someone can show me how it is supposed to be done.

[*]i dont understand why lines 68 & 69 are giving me the trouble? so perhaps someone with a bit more knowledge can help me get this thing running, as i have been strugling with this code for about a week now, i managed to reduce errors to lines 68 & 69 but thats about how far my programming skills got me for now…

ps: dont mind my bad grammar and spelling

pss: all comments are WELCOME

psss: help is especially WELCOME

regards

:wacko:

For the first two errors: You can’t use float values as an index to an array, you have to cast it to an int or unsigned int first.
For the other errors: when working with 2D arrays I would suggest allocating them as a single float* and indexing them as
array[y_index*N+x_index].

N.

thx for your promp reply Nico

i knew it… the first two errors seemed a bit odd to me… now i know why.

thx you’re a life savier

should be…

float (*a_h)[][ N ];

Complex (*b_h)[][ N ], *Lookup;

See if this works…

Best of luck…

i am afraid it is no good. i get these errors

but thx for your suggestion nitin.life

thx Nico again this worked like a charm External Media