I was following a tutorial from the below video, and when I attempt to build the project, I get the following output in the console.
1>------ Build started: Project: VectorAdd, Configuration: Debug Win32 ------
1> Compiling CUDA source file kernel.cu…
1>
1> C:\Users\Reid Lowdon\Documents\CUDA Projects\VectorAdd\VectorAdd>“C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin\nvcc.exe” -gencode=arch=compute_20,code="sm_20,compute_20" --use-local-env --cl-version 2015 -ccbin “C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin” -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\include" -G --keep-dir Debug -maxrregcount=0 --machine 32 --compile -cudart static -g -DWIN32 -D_DEBUG -D_CONSOLE -D_MBCS -Xcompiler “/EHsc /W3 /nologo /Od /FS /Zi /RTC1 /MDd " -o Debug\kernel.cu.obj “C:\Users\Reid Lowdon\Documents\CUDA Projects\VectorAdd\VectorAdd\kernel.cu”
1>C:/Users/Reid Lowdon/Documents/CUDA Projects/VectorAdd/VectorAdd/kernel.cu(45): error : a global function call must be configured
1>CUDACOMPILE : nvcc warning : The ‘compute_20’, ‘sm_20’, and ‘sm_21’ architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).
1> kernel.cu
1>C:\Program Files (x86)\MSBuild\Microsoft.Cpp\v4.0\V140\BuildCustomizations\CUDA 8.0.targets(687,9): error MSB3721: The command ““C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin\nvcc.exe” -gencode=arch=compute_20,code="sm_20,compute_20" --use-local-env --cl-version 2015 -ccbin “C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin” -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\include” -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\include” -G --keep-dir Debug -maxrregcount=0 --machine 32 --compile -cudart static -g -DWIN32 -D_DEBUG -D_CONSOLE -D_MBCS -Xcompiler “/EHsc /W3 /nologo /Od /FS /Zi /RTC1 /MDd " -o Debug\kernel.cu.obj “C:\Users\Reid Lowdon\Documents\CUDA Projects\VectorAdd\VectorAdd\kernel.cu”” exited with code 2.
========== Build: 0 succeeded, 1 failed, 0 up-to-date, 0 skipped ==========
Here is my code:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <stdlib.h>
#define SIZE 1024
__global__ void GPUVectorAdd(int *a, int *b, int *c, int n) {
int i = threadIdx.x;
if (i < n) {
c[i] = a[i] + b[i];
}
}
void VectorAdd(int *a, int *b, int *c, int n) {
int i;
for (i = 0; i < n; i++) {
c[i] = a[i] + b[i];
}
}
int main() {
int *a, *b, *c;
int *d_a, *d_b, *d_c;
cudaMalloc(&d_a, SIZE * sizeof(int));
cudaMalloc(&d_b, SIZE * sizeof(int));
cudaMalloc(&d_c, SIZE * sizeof(int));
for (int i = 0; i < SIZE; i++) {
a[i] = i;
b[i] = i;
c[i] = 0;
}
cudaMemcpy(d_a, a, SIZE * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_b, b, SIZE * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_c, c, SIZE * sizeof(int), cudaMemcpyHostToDevice);
GPUVectorAdd<<<1, SIZE>>>(d_a, d_b, d_c, SIZE);
GPUVectorAdd(a, b, c, SIZE);
cudaMemcpy(c, d_c, SIZE * sizeof(int), cudaMemcpyDeviceToHost);
for (int i = 0; i < 10; i++) {
printf("c[%d] = %d\n", i, c[i]);
}
free(a);
free(b);
free(c);
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
return (0);
}