Red underline in CUDA for Visual studio 15

Hello,
I am getting intellisense errors in CUDA while compiling in VS 15. How can I compile without intellisense errors blocking the compilation?

If Intellisense is putting red underlines under CUDA-specific constructs, ignore it.

The errors showing in it are:

sum = (dev_a[th_numcols + c]-dev_a[kcols + c]); it says expression must have an integral or enum type

:distance<<< numBlocks, threadsPerBlock>>>(d_a,d_c);
sorting<<<numBlocks,threadsPerBlock>>>(sort, K); it says expected an expression

:time = clock()−start; it says expected an ;

It shows all are intellisense errors but I am not able to compile the code. It is stopping in between and showing the errors.

That is likely a real bug in your code (which you haven’t shown).

njuffa’s right. If it was just intellisense that was failing, your code would still compile. For the sake of clarity, you’re using the Nsight plug-in, correct?

But as a matter of other fun fact, VS sucks for CUDA. The best IDE I’ve found is Sublime Text with some awesome CUDA syntax stuff plugged in from I think Mark Harris, actually, who writes those parallelforall blogs.

I’ve also heard the ArrayFire people use like Vim/Emacs :P

#include “cuda_runtime.h”
#include “device_launch_parameters.h”
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <math.h>
#define rows 12000
#define cols 30

int aa[rows][cols];
int cc[rows][rows];
double K;
//distance//
global void distance(const double * dev_a, double * dev_c, double *k, dim3 thPerblk)
{
int th_num = blockIdx.x *128 + threadIdx.x;
double sum = 0;

for(double k = th_num + 1; k < rows; k++)

{
for (int c = 0; c < cols; c++)

sum = (dev_a[th_num*cols + c]-dev_a[k*cols + c]);

sum=
dev_c[th_num] = sqrt(sum);
}
// printf ("Sum: %d \t " , sum);
}
//sorting//
global void sorting(double *dev_c, double *sort, double K, dim3 thPerblk)
{

int temp; int i; i = blockIdx.x * 64 + threadIdx.x;

for (int r = 0; r < rows; r++)
{
for (int c = 0; c < cols; c++)
{
if(dev_c[colsr + c]<dev_c[cols i + c])

{
temp = dev_c[colsr + c];
dev_c[cols
r + c] = dev_c[cols* i + c];
dev_c[cols* i + c] = temp;
}
}
}
}
int main()
{
printf(" enter the number of K nearest neighbors :“);
scanf(”%d", &K);
FILE *myFile;
myFile = fopen(" test.csv ", “r “);
if(myFile == NULL)
{
printf(” Error Reading File\n”);
exit(0);
}

    char buffer[1024]; 
int i = 0, j = 0; 
char *record, *line; 

while ((line = fgets(buffer, sizeof(buffer), myFile)) != NULL) 
{

j = 0; 
record = strtok(line, " ,");
while (record != NULL) 
{ 
    // printf("%d \t %d \t %d \n" , ( cols * i ) + j , i , j ); 
    aa [ i ] [ j ] = atoi ( record ); 
    record = strtok (NULL, " ,"); 
    j++; 
} 
i++; 
}
fclose(myFile);
cudaError_t cudaStatus;
cudaStatus = cudaDeviceReset(); 

if(cudaStatus != cudaSuccess) 

{ 
    fprintf(stderr, "cudaDeviceReset failed !"); 
    return 1; 
}
cudaStatus=cudaSetDevice(0); 

if(cudaStatus != cudaSuccess) 
{
    fprintf(stderr, "cudaSetDevice failed ! Do you have a CUDA?");
        goto Error ; 
} 
else 
    printf ("Working \n");

        clock_t start;
        clock_t stop;
        clock_t time;
        start = clock();

    int *d_a = 0; 
    cudaStatus = cudaMalloc((void **)&d_a, rows*cols * sizeof(int)); 
    if(cudaStatus != cudaSuccess) 
    {
        fprintf(stderr, "cudaMalloc failed !"); 
        goto Error;

    }
 else
 printf(" Success ! ! ! \n");
 cudaStatus = cudaMemcpy(d_a, aa, rows*cols * sizeof(int *),      cudaMemcpyHostToDevice); 
 if(cudaStatus != cudaSuccess) 

 { 
     fprintf(stderr, "cudaMemcpy failed !"); 
     goto Error; 
 }
 else 
 printf(" Success ! ! ! \n");

 double *d_c = 0; 
 cudaStatus = cudaMalloc((void **)&d_c, rows* rows * sizeof(double)); 

 if(cudaStatus != cudaSuccess) 

 { 
 fprintf(stderr, "cudaMalloc failed !"); 
 goto Error; 
 }
  else 
 printf(" Success ! ! ! \n"); 

 double *sort = 0; 
 cudaStatus = cudaMalloc((void **)&sort, rows* rows * sizeof(double)); 

 if(cudaStatus != cudaSuccess) 

     { 
 fprintf(stderr, "cudaMalloc failed !"); 
 goto Error; 
 }
   else 
 printf(" Success ! ! ! \n");

 int threads = 64; 
 while (rows%threads != 0) 
 threads++;

 printf("TH: %d \n", threads); 
 //return 0;

 dim3 threadsPerBlock(threads); 
 dim3 numBlocks(rows / threadsPerBlock.x);




      distance<<< numBlocks, threadsPerBlock>>>(d_a,d_c); 
      sorting<<<numBlocks,threadsPerBlock>>>(sort, K);
      cudaStatus = cudaGetLastError();

     if(cudaStatus != cudaSuccess) 
     { 
    fprintf(stderr, "addKern launch failed : %s\n", cudaGetErrorString(cudaStatus)); 
    goto Error; 
    }

    cudaStatus = cudaDeviceSynchronize(); 

    if(cudaStatus != cudaSuccess) 
    { 
                 fprintf(stderr, "cudaDeviceSynchronize returned error code %d after   launching addKernel !\n", cudaStatus); 
                goto Error; 
               }
                 //return cudaStatus ; 
            cudaStatus = cudaMemcpy(cc , d_c, rows*rows * sizeof ( double ) ,  cudaMemcpyDeviceToHost ); 
             if(cudaStatus != cudaSuccess ) 
             { 
             fprintf ( stderr , "addKernel launch failed : %s\n" ,   cudaGetErrorString ( cudaStatus )); 
                 goto Error ; 
          }

    double total;
     for (int i = 0; i <= K; i++)

stop = clock();
time = clock()−start;
total=time/CLK_TCK;
printf(“GPU Time Taken: %f \n”,total);
for(int l = 0; l <= K; l++)
{
for(i = 0; i < rows; i++)

            { 
                for(int j = 0; j < rows; j++) 
                { 
                    printf("%f \t ", cc[(rows * i) + j]); 
                 }
             } 
         }
         Error: 
          // printf (" Exiting . . \n"); 
           cudaFree(d_c); 
           cudaFree(d_a);
           return cudaStatus;
           }

malloc and goto’s, oh my!