How to fix Error MSB3721

Im on Windows 10x64bit machine, CUDA 8.0, Geforce 610M(which is still supported), Visual Studio 2015(which integrated with CUDA)

I tried to compile Device Query and VectorAdd, it’s work just fine.
Then I created my own mergesort code, these error came up:

Error MSB3721 The command ““D:\Apps Skripsi\CUDA v8.0\v8.0\bin\nvcc.exe” -dlink -o x64\Debug\MergesortCUDA.device-link.obj -Xcompiler “/EHsc /W3 /nologo /Od /Zi /RTC1 /MDd " -L"D:\Apps Skripsi\CUDA v8.0\v8.0\lib\x64” cudart.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib -gencode=arch=compute_21,code=sm_21 -G --machine 64 x64\Debug\kernel.cu.obj” exited with code 1. MergesortCUDA C:\Program Files (x86)\MSBuild\Microsoft.Cpp\v4.0\V140\BuildCustomizations\CUDA 8.0.targets 773

So tried to copy paste my own mergesort code in to VectorAdd.cu, need to check if the linker is the problem, but those error still came up,. How to fix this? thanks

MSB3721 is the VS way of saying “I ran nvcc, and it returned an error code.”

Other than knowing that your compilation failed, it is completely useless for understanding why it failed.

To understand why, it’s necessary to increase the verbosity of VS output so that it shows the actual invocation of nvcc and the actual error reported by nvcc (prior to VS reporting the MSB3721 error).

If you google how to increase verbosity of VS output, you’ll be able to find articles explaining how.

Thanks for your advice.
I did set the verbose to detailed, and yes, there is so much build output come out, such as platform toolset it got fix right after I edit the .vcxproj, then there is something else:

nvcc : unsupported gpu architecture “compute_21”

On this link c++ - Cuda Unsupported gpu architecture Eclipse Nsight - Stack Overflow
it said there is such thing called compute_21 so I changed then Code generation in Properties but the error come out >200 or so. I googled in CUDA - Wikipedia, it written that in CUDA 8.0 CC 2.0 - 6.x still supported. So what posibbly wrong, sir? thanks.

There is no such thing as compute_21

There is sm_21, but no compute_21

SO you should specify

compute_20,sm_21

Sorry I mean there is no such thing.
I tried that (compute_20, sm_21) in code generation in visual studio, but still got the MSB3721, and so many >200 errors, funny thing is I changed the build to x86 some error reduce. Btw I used Visual Studio Enterprise 2015 is that any effect? Or any another advices? thanks man.

1 Like

Here my mergesort code:

#include <stdio.h>
#include <conio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <stdint.h>
#include <string.h>
#include "cuda.h"
#include "device_launch_parameters.h"
#include "cuda_runtime.h"
#include "cuda_device_runtime_api.h"

__device__ void merge(size_t *a, size_t *aux, int lo, int mid, float hi);

__global__ void mergeBU(size_t *a, size_t *aux, int N)
{

	for (int sz = 1; sz < N; sz = sz + sz)
	{
		for (int lo = 2 * sz*threadIdx.x; lo < N - sz; lo += sz + sz)
		{
			int mid = lo + sz - 1;
			float hi = fminf(lo + sz + sz - 1, N - 1);
			merge(a, aux, lo, mid, hi);

		}
	}
}

__device__ void merge(size_t *a, size_t *aux, int lo, int mid, int hi)
{
	int i = lo;
	int j = mid + 1;

	for (int k = lo; k <= hi; k++)
	{
		aux[k] = a[k];
	}

	for (int k = lo; k <= hi; k++)
	{
		if (i > mid) { a[k] = aux[j]; j++; }
		else if (j > hi) { a[k] = aux[i]; i++; }
		else if (aux[j] < aux[i]) { a[k] = aux[j]; j++; }
		else { a[k] = aux[i]; i++; }
	}
}


int main(int argc, char **argv)
{
	int gridDim = 8;
	int blockDim = 192;
	int N;
	size_t *a, *aux;
	size_t *d_a, *d_aux;
	clock_t t;
	printf("Masukan jumlah data : ");
	scanf_s("%d", &N);

	a = (size_t *)malloc(N * sizeof(size_t));
	aux = (size_t *)malloc(N * sizeof(size_t));

	cudaMalloc(&d_a, N * sizeof(size_t));
	cudaMalloc(&d_aux, N * sizeof(size_t));

	for (int i = 0; i < N; i++)
	{
		a[i] = rand()%100;
		//printf("%d ", a[i]);
	}

	cudaMemcpy(d_a, a, N * sizeof(size_t), cudaMemcpyHostToDevice);
	cudaMemcpy(d_aux, aux, N * sizeof(size_t), cudaMemcpyHostToDevice);

	t = clock();
	mergeBU << <gridDim, blockDim >> > (d_a, d_aux, N);
	t = clock() - t;

	cudaMemcpy(a, d_a, N * sizeof(size_t), cudaMemcpyDeviceToHost);

	for (int i = 0; i < N; i++) { printf("%d ", a[i]); }
	double time_taken = ((double)t) / CLOCKS_PER_SEC;

	printf("\n");
	printf("time = %f secs\n", time_taken);
	free(a);
	free(aux);
	cudaFree(d_a);
	cudaFree(d_aux);
	_getch();
	return 0;
}
__device__ void merge(size_t *a, size_t *aux, int lo, int mid, int hi)

I changed from int hi to float hi,
and change int gridDim, blockDim to dim3 gridDim,blockDim
The MSB3721 error gone.
shame to me actually.

But there is error : LNK2019, but then I add cudart.lib in the configuration Properties - linker - input. And build succeeded. Cheers man! thanks

Wait a minute!

#include <stdio.h>
#include <conio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <stdint.h>
#include <string.h>
#include <cuda.h>
#include <device_launch_parameters.h>
#include <cuda_runtime.h>
#include <cuda_device_runtime_api.h>

__device__ void merge(size_t *a, size_t *aux, int lo, int mid, float hi);

__global__ void mergeBU(size_t *a, size_t *aux, int N)
{

	for (int sz = 1; sz < N; sz = sz + sz)
	{
		for (int lo = 2 * sz*threadIdx.x; lo < N - sz; lo += sz + sz)
		{
			int mid = lo + sz - 1;
			float hi = fminf(lo + sz + sz - 1, N - 1);
			merge(a, aux, lo, mid, hi);

		}
	}
}

__device__ void merge(size_t *a, size_t *aux, int lo, int mid, float hi)
{
	int i = lo;
	int j = mid + 1;

	for (int k = lo; k <= hi; k++)
	{
		aux[k] = a[k];
	}

	for (int k = lo; k <= hi; k++)
	{
		if (i > mid) { a[k] = aux[j]; j++; }
		else if (j > hi) { a[k] = aux[i]; i++; }
		else if (aux[j] < aux[i]) { a[k] = aux[j]; j++; }
		else { a[k] = aux[i]; i++; }
	}
}

unsigned int randr(unsigned int min, unsigned int max)
{
	double scaled = (double)rand() / RAND_MAX;

	return (max - min + 1)*scaled + min;
}

int main(void)
{
	dim3 gridDim = 8;
	dim3 blockDim = 192;
	int N;
	size_t *a, *aux;
	size_t *d_a, *d_aux;
	clock_t t;
	printf("Masukan jumlah data : ");
	scanf_s("%d", &N);

	a = (size_t *)malloc(N * sizeof(size_t));
	aux = (size_t *)malloc(N * sizeof(size_t));

	cudaMalloc(&d_a, N * sizeof(size_t));
	cudaMalloc(&d_aux, N * sizeof(size_t));

	for (int i = 0; i < N; i++)
	{
		a[i] = randr(1,10000);
		//printf("%d ", a[i]);
	}

	cudaMemcpy(d_a, a, N * sizeof(size_t), cudaMemcpyHostToDevice);
	cudaMemcpy(d_aux, aux, N * sizeof(size_t), cudaMemcpyHostToDevice);

	t = clock();
	mergeBU <<<gridDim, blockDim >>> (d_a, d_aux, N);
	t = clock() - t;

	cudaMemcpy(a, d_a, N * sizeof(size_t), cudaMemcpyDeviceToHost);

	for (int i = 0; i < N; i++) { printf("%d ", a[i]); }
	double time_taken = ((double)t) / CLOCKS_PER_SEC;

	printf("\n");
	printf("time = %f secs\n", time_taken);
	free(a);
	free(aux);
	cudaFree(d_a);
	cudaFree(d_aux);
	_getch();
	return 0;
}

Those code build succesfully, then I tried to compiled using Nsight, but the GPU stat is grey coloured, like is not been used. What I’m asking is those code is actually running on GPU? im not sure really.

TO simplyfy my quetion: what the difference between when I click on Local windows debugger and Nsight-start CUDA debugger?

Hello, I meet this problem, too. How can I solve it? Thanks.

1>C:\Program Files (x86)\MSBuild\Microsoft.Cpp\v4.0\V120\BuildCustomizations\CUDA 7.0.targets(593,9): error MSB3721: 命令““C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.0\bin\nvcc.exe” -gencode=arch=compute_35,code="sm_35,compute_35" --use-local-env --cl-version 2013 -ccbin “C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin\x86_amd64” -rdc=true -IInclude\cutil\inc -I"D:\Programs\BundleFusion-master\FriedLiver" -ISource\DXUT\Optional -ISource\DXUT\Core -IInclude\Uplink -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.0\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.0\include" --keep-dir x64\Debug -maxrregcount=62 --machine 64 --compile -cudart static -g -DWIN32 -D_DEBUG -DDEBUG -DPROFILE -D_CONSOLE -DD3DXFX_LARGEADDRESS_HANDLE -D_CRT_SECURE_NO_WARNINGS -DNOMINMAX -D_UNICODE -DUNICODE -Xcompiler "/EHsc /W3 /nologo /Od /Zi /RTC1 /MDd " -o x64\Debug\CUDACache.cu.obj “D:\Programs\BundleFusion-master\FriedLiver\Source\CUDACache.cu””已退出,返回代码为 2。
1>已完成执行任务“CudaCompile”的操作 - 失败。
1>已完成在项目“FriedLiver.vcxproj”中生成目标“CudaBuild”的操作 - 失败。

I just got a latest update of VS 2019 Community edition, we are using C++.
Our project used to build earlier. Now we are getting
MSB3721 error:

Severity Code Description Project File Line Suppression State
Error MSB3721 The command ““C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\bin\nvcc.exe” -gencode=arch=compute_30,code="sm_30,compute_30" --use-local-env -ccbin “C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.28.29910\bin\HostX86\x64” -x cu -I"C:\Dev\OctoplusNetra-devcuda\packages\libtiff-msvc-x64.4.0.7.8807\build\native../…//build/native/include” -I"C:\Dev\OctoplusNetra-devcuda\packages\sqlite3_c_plus_plus.1.0.3\build\native../…//build/native/include/" -I"C:\Program Files (x86)\National Instruments\Shared\ExternalCompilerSupport\C\include" -I. -I…......\include -I…......\Classes\Basic -I…......\Classes\Gui -I…......\externals\CorLib -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\include" -I"C:\ProgramData\NVIDIA Corporation\CUDA Samples\v10.2\2_Graphics\volumeRender2.2Upload\vcpkg-master\installed\x64-windows\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\include" -G --keep-dir x64\Debug -maxrregcount=0 --machine 64 --compile -cudart static -g -DHAS_libtiff -DWIN32 -D_DEBUG -D_WINDOWS -DWINVER=0x0501 -D_WIN32_WINNT=0x0501 -D_ITERATOR_DEBUG_LEVEL=0 -D_LIB -DHAVE_SNPRINTF -D_CRT_NONSTDC_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS -D_VC80_UPGRADE=0x0710 -D_UNICODE -DUNICODE -D_AFXDLL -Xcompiler “/EHsc /W4 /nologo /Od /Fd.\Debug64\ /FS /Zi /MDd " -o Debug64\cuda_ProcKernels.cu.obj “C:\Dev\OctoplusNetra-devcuda\cuda_ProcKernels.cu”” exited with code 255. GrabDemo C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\MSBuild\Microsoft\VC\v160\BuildCustomizations\CUDA 10.2.targets 764
Error MSB3721 The command ““C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\bin\nvcc.exe” -gencode=arch=compute_30,code="sm_30,compute_30" --use-local-env -ccbin “C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.28.29910\bin\HostX86\x64” -x cu -I"C:\Dev\OctoplusNetra-devcuda\packages\libtiff-msvc-x64.4.0.7.8807\build\native../…//build/native/include” -I"C:\Dev\OctoplusNetra-devcuda\packages\sqlite3_c_plus_plus.1.0.3\build\native../…//build/native/include/" -I"C:\Program Files (x86)\National Instruments\Shared\ExternalCompilerSupport\C\include" -I. -I…......\include -I…......\Classes\Basic -I…......\Classes\Gui -I…......\externals\CorLib -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\include" -I"C:\ProgramData\NVIDIA Corporation\CUDA Samples\v10.2\2_Graphics\volumeRender2.2Upload\vcpkg-master\installed\x64-windows\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\include" -G --keep-dir x64\Debug -maxrregcount=0 --machine 64 --compile -cudart static -g -DHAS_libtiff -DWIN32 -D_DEBUG -D_WINDOWS -DWINVER=0x0501 -D_WIN32_WINNT=0x0501 -D_ITERATOR_DEBUG_LEVEL=0 -D_LIB -DHAVE_SNPRINTF -D_CRT_NONSTDC_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS -D_VC80_UPGRADE=0x0710 -D_UNICODE -DUNICODE -D_AFXDLL -Xcompiler “/EHsc /W4 /nologo /Od /Fd.\Debug64\ /FS /Zi /MDd " -o Debug64\cuda_FilterKernels.cu.obj “C:\Dev\OctoplusNetra-devcuda\cuda_FilterKernels.cu”” exited with code 255. GrabDemo C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\MSBuild\Microsoft\VC\v160\BuildCustomizations\CUDA 10.2.targets 764

Could someone please help?

Our code was successfully built by visual studio enterprise 2019 16.9.x, however after updated to visual studio enterprise 2019 16.10.x. The same MSB3721 error happened. could anyone help? the detailed build info is as follows:

Build started…
1>------ Build started: Project: libCommon, Configuration: Release x64 ------
1>Build started 6/22/2021 9:23:30 PM.
1>Target InitializeBuildStatus:
1> Touching “x64\Release\libCommon.tlog\unsuccessfulbuild”.
1>Target AddCudaCompileDeps:
1> Skipping target “AddCudaCompileDeps” because all output files are up-to-date with respect to the input files.
1>Target AddCudaCompilePropsDeps:
1> Skipping target “AddCudaCompilePropsDeps” because all output files are up-to-date with respect to the input files.
1>Target CudaBuild:
1> Target CudaBuildCore:
1> Compiling CUDA source file GPUArrUtilityFunc.cu…
1>
1> D:\XRM_TFS\ReconTrunk\Recon\libCommon>“C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\bin\nvcc.exe” -gencode=arch=compute_30,code="sm_30,compute_30" -gencode=arch=compute_35,code="sm_35,compute_35" -gencode=arch=compute_50,code="sm_50,compute_50" -gencode=arch=compute_52,code="sm_52,compute_52" -gencode=arch=compute_60,code="sm_60,compute_60" -gencode=arch=compute_61,code="sm_61,compute_61" -gencode=arch=compute_75,code="sm_75,compute_75" --use-local-env -ccbin “C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Tools\MSVC\14.29.30037\bin\HostX86\x64” -x cu -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\include" --keep-dir x64\Release -use_fast_math -maxrregcount=0 --ptxas-options=-v --machine 64 --compile -cudart shared --default-stream per-thread -DWIN32 -DJSONCPP_DISABLE_DLL_INTERFACE_WARNING -DNDEBUG -D_WINDOWS -D_USRDLL -DLIBCOMMON_EXPORTS -D_WINDLL -D_UNICODE -DUNICODE -Xcompiler “/EHsc /W3 /nologo /O2 /Fdx64\Release\vc142.pdb /FS /MD " -o x64\Release\GPUArrUtilityFunc.cu.obj “D:\XRM_TFS\ReconTrunk\Recon\libCommon\GPUArrUtilityFunc.cu” -Xcompiler /openmp
1> nvcc fatal : Unsupported gpu architecture ‘compute_30’
1> C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Microsoft\VC\v160\BuildCustomizations\CUDA 11.3.targets(785,9): error MSB3721: The command ““C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\bin\nvcc.exe” -gencode=arch=compute_30,code="sm_30,compute_30" -gencode=arch=compute_35,code="sm_35,compute_35" -gencode=arch=compute_50,code="sm_50,compute_50" -gencode=arch=compute_52,code="sm_52,compute_52" -gencode=arch=compute_60,code="sm_60,compute_60" -gencode=arch=compute_61,code="sm_61,compute_61" -gencode=arch=compute_75,code="sm_75,compute_75" --use-local-env -ccbin “C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Tools\MSVC\14.29.30037\bin\HostX86\x64” -x cu -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\include” -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\include” --keep-dir x64\Release -use_fast_math -maxrregcount=0 --ptxas-options=-v --machine 64 --compile -cudart shared --default-stream per-thread -DWIN32 -DJSONCPP_DISABLE_DLL_INTERFACE_WARNING -DNDEBUG -D_WINDOWS -D_USRDLL -DLIBCOMMON_EXPORTS -D_WINDLL -D_UNICODE -DUNICODE -Xcompiler “/EHsc /W3 /nologo /O2 /Fdx64\Release\vc142.pdb /FS /MD " -o x64\Release\GPUArrUtilityFunc.cu.obj “D:\XRM_TFS\ReconTrunk\Recon\libCommon\GPUArrUtilityFunc.cu” -Xcompiler /openmp” exited with code 1.
1> Done building target “CudaBuildCore” in project “libCommon.vcxproj” – FAILED.
1>
1> Done building project “libCommon.vcxproj” – FAILED.
1>
1>Build FAILED.
1>
1>C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Microsoft\VC\v160\BuildCustomizations\CUDA 11.3.targets(785,9): error MSB3721: The command ““C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\bin\nvcc.exe” -gencode=arch=compute_30,code="sm_30,compute_30" -gencode=arch=compute_35,code="sm_35,compute_35" -gencode=arch=compute_50,code="sm_50,compute_50" -gencode=arch=compute_52,code="sm_52,compute_52" -gencode=arch=compute_60,code="sm_60,compute_60" -gencode=arch=compute_61,code="sm_61,compute_61" -gencode=arch=compute_75,code="sm_75,compute_75" --use-local-env -ccbin “C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Tools\MSVC\14.29.30037\bin\HostX86\x64” -x cu -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\include” -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.3\include" --keep-dir x64\Release -use_fast_math -maxrregcount=0 --ptxas-options=-v --machine 64 --compile -cudart shared --default-stream per-thread -DWIN32 -DJSONCPP_DISABLE_DLL_INTERFACE_WARNING -DNDEBUG -D_WINDOWS -D_USRDLL -DLIBCOMMON_EXPORTS -D_WINDLL -D_UNICODE -DUNICODE -Xcompiler “/EHsc /W3 /nologo /O2 /Fdx64\Release\vc142.pdb /FS /MD " -o x64\Release\GPUArrUtilityFunc.cu.obj “D:\XRM_TFS\ReconTrunk\Recon\libCommon\GPUArrUtilityFunc.cu” -Xcompiler /openmp” exited with code 1.
1> 0 Warning(s)
1> 1 Error(s)
1>
1>Time Elapsed 00:00:00.87
========== Build: 0 succeeded, 1 failed, 0 up-to-date, 0 skipped ==========

after remove architecture ‘compute_30’, it built successfully

Check if your CMAKE for the build is in release mode. Then you need to change the project to release mode instead of debug. That worked for me.

how to remove the ‘compute_30’?
which file it is ?
thank you

Did you find the file name?

You Can Check the CMakeList.txt file for this issue.