Errors that occur when I start the kernel function in CUDA 5.5

I installed CUDA5.5.
Development environment is using Visual Studio 2010.
And I tried to run the source code like the following.
However, the red line was drawn to the part of “<<<” for some reason.
It is displayed Error “expression. Required” and.
If, If anyone the same phenomenon is happening, please tell me how to solve.
Thank you.

–Development environment--------------------------------------------------------------------------
OS:Windows7 64bit
Visual Studio 2010 Professional SP1
CUDA 5.5

editor captcha--------------------------------------------------------------------------------------

↓this is the Line 46 at the source code.


https://drive.google.com/?tab=mo&authuser=0#folders/0Bww_cnImeeZkbmItbDZieUdaQWM


#include <cuda_runtime.h>
#include <stdio.h> 
#include <math.h> 
#include <cuda.h> 

#define N 256

__global__ void matrix_vector_multi_gpu_1_1(float *A_d, float *B_d, float *C_d){
	int i,j;

	for(j=0;j<N;j++){
		A_d[j]=0.0F;
		for(i=0;i<N;i++){
			A_d[j]=A_d[j]+B_d[j*N+i]*C_d[i];
		}
	}
}

int main(){
	int i,j;
	float A[N], B[N*N], C[N];
	float *A_d, *B_d, *C_d;

	dim3 blocks(1,1,1);
	dim3 threads(1,1,1);

	for(j=0;j<N;j++){
		for(i=0;i<N;i++){
			B[j*N+i]=((float)j)/256.0;
		}
	}

	for(j=0;j<N;j++){
		C[j]=1.0F;
	}

	cudaMalloc((void**)&A_d, N*sizeof(float));
	cudaMalloc((void**)&B_d, N*N*sizeof(float));
	cudaMalloc((void**)&C_d, N*sizeof(float));

	cudaMemcpy(A_d,A,N*sizeof(float),cudaMemcpyHostToDevice);
	cudaMemcpy(B_d,B,N*N*sizeof(float),cudaMemcpyHostToDevice);
	cudaMemcpy(C_d,C,N*sizeof(float),cudaMemcpyHostToDevice);

	matrix_vector_multi_gpu_1_1<<<blocks,threads>>>(A_d,B_d,C_d);

	cudaMemcpy(A,A_d,N*sizeof(float),cudaMemcpyDeviceToDevice);

	for(j=0;j<N;j++){
		printf("A[ %d ]=%f \n",j,A[j]);
	}
	getchar();
	cudaFree(A_d);
	cudaFree(B_d);
	cudaFree(C_d);
	return 0;
}