Compilation problem: undefined reference to...

Hi, i’m having this problem:

]$ make

/opt/lib/cuda/bin/nvcc --ptxas-options=-v -I src/ -c src/clock.c -o build/clock.o

/opt/lib/cuda/bin/nvcc --ptxas-options=-v -I src/ -c src/matMul.cu -o build/matMul.o

ptxas info : Compiling entry function ‘__globfunc__Z6matMulPfS_S_i’

ptxas info : Used 9 registers, 44+40 bytes smem, 4 bytes cmem[1]

/opt/lib/cuda/bin/nvcc --ptxas-options=-v -o bin/matMul build/matMul.o build/clock.o

build/clock.o(.text+0x1b): In function `ovhd’:

: undefined reference to `start_counter’

build/clock.o(.text+0x25): In function `ovhd’:

: undefined reference to `get_counter’

build/clock.o(.text+0x61): In function `mhz’:

: undefined reference to `start_counter’

build/clock.o(.text+0x73): In function `mhz’:

: undefined reference to `get_counter’

build/clock.o(.text+0x11f): In function `callibrate’:

: undefined reference to `start_counter’

build/clock.o(.text+0x129): In function `callibrate’:

: undefined reference to `get_counter’

build/clock.o(.text+0x14a): In function `callibrate’:

: undefined reference to `get_counter’

build/clock.o(.text+0x288): In function `start_comp_counter’:

: undefined reference to `start_counter’

build/clock.o(.text+0x29c): In function `get_comp_counter’:

: undefined reference to `get_counter’

collect2: ld returned 1 exit status

make: *** [matMul] Error 255

Does anyone knows what the problem is?

Makefile

[codebox]# source files.

SRC1 = matMul.cu

SRC2 = clock.c

OUT = matMul

include directories

INCLUDES = -I src/

#compilers

GCC = g++

CCC = /opt/lib/cuda/bin/nvcc --ptxas-options=-v

default: $(OUT)

$(OUT):

$(CCC) $(INCLUDES) -c src/$(SRC2) -o build/clock.o

$(CCC) $(INCLUDES) -c src/$(SRC1) -o build/matMul.o

$(CCC) -o bin/$(OUT) build/matMul.o  build/clock.o 

clean:

rm -f build/*.o bin/* [/codebox]

matMul.cu

[codebox]

#include <stdio.h>

#include “clock.h”

global void matMul(float *A, float *B, float *C,int SIZE){

int row=threadIdx.x;

int line=threadIdx.y;

//float Cvalue = 0;

for(int k=0; k<SIZE; ++k)

{

	C[line * SIZE + row] += A[line * SIZE + k] * B[k * SIZE + row];

	//float Adelement=A[line * A.with + k];

	//float Bdelement=B[k * B.width + row];

	//Cvalue += Adelement * Bdelement;

}

//C[line * SIZE + row] = Cvalue;

}

int main(){

//double 	ticks=0, ticks_before=0;

int SIZE = 1024;

float *Ah = (float*)malloc(SIZE*SIZE*sizeof(float));

float *Bh = (float*)malloc(SIZE*SIZE*sizeof(float));

float *Ch = (float*)malloc(SIZE*SIZE*sizeof(float));

memset(Ah,0,SIZE*SIZE*sizeof(float));

memset(Bh,0,SIZE*SIZE*sizeof(float));

memset(Ch,0,SIZE*SIZE*sizeof(float));

float *Ad;

float *Bd;

float *Cd;

cudaMalloc((void**)&Ad,SIZE*SIZE*sizeof(float));

cudaMalloc((void**)&Bd,SIZE*SIZE*sizeof(float));

cudaMalloc((void**)&Cd,SIZE*SIZE*sizeof(float));

dim3 blockSize = dim3(16*16);

dim3 gridSize = dim3(SIZE/blockSize.x, SIZE/blockSize.y);

cudaMemcpy(Ad, Ah, SIZE*SIZE*sizeof(float), cudaMemcpyHostToDevice);

cudaMemcpy(Bd, Bh, SIZE*SIZE*sizeof(float), cudaMemcpyHostToDevice);

cudaMemcpy(Cd, Ch, SIZE*SIZE*sizeof(float), cudaMemcpyHostToDevice);

//ticks_before = get_counter();	

matMul<<<gridSize, blockSize>>>(Ad, Bd, Cd, SIZE);



//ticks = get_counter();

//printf("%f;     ", (ticks-ticks_before) / (SIZE*SIZE*SIZE));	

cudaMemcpy(Ch,Cd,SIZE*SIZE*sizeof(float), cudaMemcpyDeviceToHost);

printf("Done!\n");

return (0);

}[/codebox]

clock.c

[codebox]#include <stdio.h>

#include <stdlib.h>

#include <unistd.h>

#include <sys/times.h>

#include “clock.h”

/*

  • Routines for using the cycle counter

*/

/* Detect whether running on Alpha */

#ifdef __alpha

#define IS_ALPHA 1

#else

#define IS_ALPHA 0

#endif

/* Detect whether running on x86 */

#ifdef i386

#define IS_x86 1

#else

#define IS_x86 0

#endif

#if IS_ALPHA

/* Initialize the cycle counter */

static unsigned cyc_hi = 0;

static unsigned cyc_lo = 0;

/* Use Alpha cycle timer to compute cycles. Then use

measured clock speed to compute seconds

*/

/*

  • counterRoutine is an array of Alpha instructions to access

  • the Alpha’s processor cycle counter. It uses the rpcc

  • instruction to access the counter. This 64 bit register is

  • divided into two parts. The lower 32 bits are the cycles

  • used by the current process. The upper 32 bits are wall

  • clock cycles. These instructions read the counter, and

  • convert the lower 32 bits into an unsigned int - this is the

  • user space counter value.

  • NOTE: The counter has a very limited time span. With a

  • 450MhZ clock the counter can time things for about 9

  • seconds. */

static unsigned int counterRoutine =

{

0x601fc000u,

0x401f0000u,

0x6bfa8001u

};

/* Cast the above instructions into a function. */

static unsigned int (*counter)(void)= (void *)counterRoutine;

void start_counter()

{

/* Get cycle counter */

cyc_hi = 0;

cyc_lo = counter();

}

double get_counter()

{

unsigned ncyc_hi, ncyc_lo;

unsigned hi, lo, borrow;

double result;

ncyc_lo = counter();

ncyc_hi = 0;

lo = ncyc_lo - cyc_lo;

borrow = lo > ncyc_lo;

hi = ncyc_hi - cyc_hi - borrow;

result = (double) hi * (1 << 30) * 4 + lo;

if (result < 0) {

	fprintf(stderr, "Error: Cycle counter returning negative value: %.0f\n", result);

}

return result;

}

#endif /* Alpha */

#if IS_x86

/* $begin x86cyclecounter */

/* Initialize the cycle counter */

static unsigned cyc_hi = 0;

static unsigned cyc_lo = 0;

/* Set *hi and *lo to the high and low order bits of the cycle counter.

Implementation requires assembly code to use the rdtsc instruction. */

void access_counter(unsigned *hi, unsigned *lo)

{

asm("rdtsc; movl %%edx,%0; movl %%eax,%1"   /* Read cycle counter */

	: "=r" (*hi), "=r" (*lo)                /* and move results to */

	: /* No input */                        /* the two outputs */

	: "%edx", "%eax");

}

/* Record the current value of the cycle counter. */

void start_counter()

{

access_counter(&cyc_hi, &cyc_lo);

}

/* Return the number of cycles since the last call to start_counter. */

double get_counter()

{

unsigned ncyc_hi, ncyc_lo;

unsigned hi, lo, borrow;

double result;



/* Get cycle counter */

access_counter(&ncyc_hi, &ncyc_lo);



/* Do double precision subtraction */

lo = ncyc_lo - cyc_lo;

borrow = lo > ncyc_lo;

hi = ncyc_hi - cyc_hi - borrow;

result = (double) hi * (1 << 30) * 4 + lo;

if (result < 0) {

	fprintf(stderr, "Error: counter returns neg value: %.0f\n", result);

}

return result;

}

/* $end x86cyclecounter */

#endif /* x86 */

double ovhd()

{

/* Do it twice to eliminate cache effects */

int i;

double result;



for (i = 0; i < 2; i++) {

	start_counter();

	result = get_counter();

}

return result;

}

/* $begin mhz */

/* Estimate the clock rate by measuring the cycles that elapse */

/* while sleeping for sleeptime seconds */

double mhz(int verbose, int sleeptime)

{

double rate;



start_counter();

sleep(sleeptime);

rate = get_counter() / (1e6*sleeptime);

if (verbose) 

	printf("Processor clock rate ~= %.1f MHz\n", rate);

return rate;

}

/* $end mhz */

/* Version using a default sleeptime */

double mhz_def(int verbose)

{

return mhz(verbose, 2);

}

/** Special counters that compensate for timer interrupt overhead */

static double cyc_per_tick = 0.0;

#define NEVENT 100

#define THRESHOLD 1000

#define RECORDTHRESH 3000

/* Attempt to see how much time is used by timer interrupt */

static void callibrate(int verbose)

{

double oldt;

struct tms t;

clock_t oldc;

int e = 0;



times(&t);

oldc = t.tms_utime;

start_counter();

oldt = get_counter();

while (e <NEVENT) {

	double newt = get_counter();

	

	if (newt-oldt >= THRESHOLD) {

		clock_t newc;

		times(&t);

		newc = t.tms_utime;

		if (newc > oldc) {

			double cpt = (newt-oldt)/(newc-oldc);

			if ((cyc_per_tick == 0.0 || cyc_per_tick > cpt) && cpt > RECORDTHRESH)

				cyc_per_tick = cpt;

			/*

			 if (verbose)

			 printf("Saw event lasting %.0f cycles and %d ticks.  Ratio = %f\n",

			 newt-oldt, (int) (newc-oldc), cpt);

			 */

			e++;

			oldc = newc;

		}

		oldt = newt;

	}

}

if (verbose)

	printf("Setting cyc_per_tick to %f\n", cyc_per_tick);

}

static clock_t start_tick = 0;

void start_comp_counter()

{

struct tms t;



if (cyc_per_tick == 0.0)

	callibrate(1);

times(&t);

start_tick = t.tms_utime;

start_counter();

}

double get_comp_counter()

{

double time = get_counter();

double ctime;

struct tms t;

clock_t ticks;



times(&t);

ticks = t.tms_utime - start_tick;

ctime = time - ticks*cyc_per_tick;

/*

 printf("Measured %.0f cycles.  Ticks = %d.  Corrected %.0f cycles\n",

 time, (int) ticks, ctime);

 */

return ctime;

}

[/codebox]

Thanks