Error using Streams

Hi guys, I am trying to write a program to perform a sum of matrices using CUDA Streams but I’m getting a couple of errors when I try to compile:

First of all, I have the following error:

./matrixSum_kernel.cu(59): error: expected a “)”

and I’m pretty sure I have closed all the brackets

and the second one is:

matrixSum.cu(151): error: too many arguments in function call

but you can check in the code that the number of arguments is right

This is the definition of streams in the file matrixSum.cu

[codebox]#define NUM_STREAMS 2

128

129 int i;

130 cudaStream_t stream[NUM_STREAMS];

131

132 for (i=0; i<NUM_STREAMS ; ++i)

133 cudaStreamCreate(&stream[i]);

134

135 for (i=0 ; i<NUM_STREAMS ; ++i) {

136 cudaMemcpyAsync(d_A + iwABLOCK_SIZE, h_A + iwABLOCK_SIZE,wABLOCK_SIZEsizeof(float),cudaMemcpyHostTo

Device, stream[i]);

137 cudaMemcpyAsync(d_B + iwABLOCK_SIZE, h_B + iwABLOCK_SIZE,wABLOCK_SIZEsizeof(float),cudaMemcpyHostTo

Device, stream[i]);

138 }

139

140 // create and start timer

141 unsigned int timer = 0;

142 CUT_SAFE_CALL(cutCreateTimer(&timer));

143 CUT_SAFE_CALL(cutStartTimer(timer));

144

145 // setup execution parameters

146 dim3 threads(BLOCK_SIZE, BLOCK_SIZE);

147 dim3 grid(wA / threads.x, wA / threads.y);

148

149 for (i=0 ; i<NUM_STREAMS ; ++i) {

150 // execute the kernel

151 matrixSum<<<grid, threads ,0, *stream>>>(d_C, d_A, d_B, wA, wA);

152

153 // check if kernel execution generated and error

154 CUT_CHECK_ERROR(“Kernel execution failed”);

      // stop and destroy timer

158 CUT_SAFE_CALL(cutStopTimer(timer));

161 printf(“Processing time: %f (ms) \n”, cutGetTimerValue(timer));

163 CUT_SAFE_CALL(cutDeleteTimer(timer));

164

165

166 for (i=0 ; i<NUM_STREAMS ; ++i)

167 cudaMemcpyAsync(h_C +iBLOCK_SIZE, d_C + iBLOCK_SIZE, wABLOCK_SIZEsizeof(float), cudaMemcpyDeviceToHost, stream[i]);

168

169 cudaThreadSynchronize();

155 }

[/codebox]

and this is the definition of the kernel in the file matrixSum_kernel.cu

[codebox]global void

59 matrixSum( float* C, float* A, float* B, int wA, int wB)

60 {[/codebox]

I would appreciate if you could offer any help to solve this problems as I don’t know if they come from the Stream definition or there’s a mistake I’m missing.

Thanks