CUDA error 700 ?? cudaDeviceSynchronize returned error code 700

d-r-x · October 29, 2019, 3:21pm

Hello

Im new programing in cuda and one of my first proyects give´s me this error:

cudaDeviceSynchronize returned error code 700 after launching mult! (mult is my global function´s name)

i´ve searched in some foros and it tells that is drivers problem but i´m not sure, if anyone could help me i really dont know how to solve it
Here is my code:
At the moment just works with square matrices

#include “cuda_runtime.h”
#include “device_launch_parameters.h”
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

global void mult(int **matriz2d1, int **matriz2d2, int **matriz2d3, int nfil, int ncol, int size) { // Realiza la multiplicacion asignando valores a la mariz 3
int temp = 0, c2;
int f = blockDim.y * blockIdx.y + threadIdx.y; //for (int f = 0; f <= fil - 1; f += 1) equivalencia en for
int c = blockDim.x * blockIdx.x + threadIdx.x; //for (int c = 0; c <= col - 1; c += 1)
if (f<nfil && c<ncol) {
//if (f*c < size){
for (c2 = 0; c2 <= ncol - 1; c2 += 1) {
temp += matriz2d1[f][c2] * matriz2d2[c2][c];
}
matriz2d3[f][c] = temp;
}
}

void impresion(int** matrizn, int nfil, int ncol) {//imprime la matriz
for (int f = 0; f <= nfil - 1; f += 1) {
for (int c = 0; c <= ncol - 1; c += 1) {
printf(“%d “, matrizn[f][c]);
}
printf(”\n”);
}
}

cudaError_t cargar(int **matriz2d1, int **matriz2d2, int **matriz2d3, int nfil, int ncol, int size) {// sube las matrices a los buses de los gpu
int **dev_a = nullptr;
int **dev_b = nullptr;
int **dev_c = nullptr;
dim3 num_fil(nfil / nfil);
dim3 num_col(nfil, ncol);
cudaError_t cudaStatus;

cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
	fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");
	goto Error;
}

cudaStatus = cudaMalloc((void **)&dev_a, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
	fprintf(stderr, "cudaMalloc failed!");
	goto Error;
}
cudaStatus = cudaMalloc((void **)&dev_b, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
	fprintf(stderr, "cudaMalloc failed!");
	goto Error;
}
cudaStatus = cudaMalloc((void **)&dev_c, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
	fprintf(stderr, "cudaMalloc failed!");
	goto Error;
}

cudaStatus = cudaMemcpy(dev_a, matriz2d1, size * sizeof(int), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
	fprintf(stderr, "cudaMemcpy failed!");
	goto Error;
}
cudaStatus = cudaMemcpy(dev_b, matriz2d2, size * sizeof(int), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
	fprintf(stderr, "cudaMemcpy failed!");
	goto Error;
}

mult << < num_fil, num_col >> >(dev_a, dev_b, dev_c, nfil, ncol, size);
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
	fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
	goto Error;
}
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
	fprintf(stderr, "\ncudaDeviceSynchronize returned error code %d after launching mult!\n\n", cudaStatus);
	goto Error;
}

cudaStatus = cudaMemcpy(matriz2d3, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
	fprintf(stderr, "cudaMemcpy failed!");
	goto Error;
}

Error:
cudaFree(dev_a);
cudaFree(dev_b);
cudaFree(dev_c);
return cudaStatus;
}

int main() {
int matriz2d1, matriz2d2, matriz2d3;
int nfil, ncol;
srand(time(NULL));
printf(“ingrese el numero de filas: “);
scanf(”%d”, &nfil);
printf(“ingrese el numero de columnas: “);
scanf(”%d”, &ncol);
while (nfil <= 0) {
printf(“el numero ingresado no puede ser menor o igual a cero ingrese otro: “);
scanf(”%d”, &nfil);
}
while (ncol <= 0) {
printf(“el numero ingresado no puede ser menor o igual a cero ingrese otro: “);
scanf(”%d”, &ncol);
}
int size = nfil * ncol;
//llenado de la matriz 1
matriz2d1 = (int)malloc(nfil * sizeof(int));
for (int i = 0; i <= nfil - 1; i++) {
matriz2d1[i] = (int)malloc(nfil * sizeof(int));
}
for (int f = 0; f <= nfil - 1; f += 1) {
for (int c = 0; c <= ncol - 1; c += 1) {
matriz2d1[f][c] = 1 + rand() % (11 - 1);
}
}
printf(“Datos Matriz 1: \n”); //imprime matriz1
impresion(matriz2d1, nfil, ncol);
//////llenado de la matriz 2//////////////////////////////////
matriz2d2 = (int)malloc(nfil * sizeof(int*));
for (int i = 0; i <= nfil - 1; i++) {
matriz2d2[i] = (int*)malloc(nfil * sizeof(int));
}
for (int f = 0; f <= nfil - 1; f += 1) {
for (int c = 0; c <= ncol - 1; c += 1) {
matriz2d2[f][c] = 1 + rand() % (11 - 1);
}
}
printf(“\nDatos Matriz 2: \n”); //imprime matriz2
impresion(matriz2d2, nfil, ncol);
//////inicializando 3/////////////////////////////////////////
matriz2d3 = (int**)malloc(nfil * sizeof(int*));
for (int i = 0; i <= nfil - 1; i++) {
matriz2d3[i] = (int*)malloc(nfil * sizeof(int));
}
for (int f = 0; f <= nfil - 1; f += 1) {
for (int c = 0; c <= ncol - 1; c += 1) {
matriz2d2[f][c] = 1 + rand() % (11 - 1);
}
}
printf(“\nDatos Matriz 3: \n”); //imprime matriz3
impresion(matriz2d3, nfil, ncol);
/////Llamada al kernel////////////////////////////////////////
cudaError_t cudaStatus = cargar(matriz2d1, matriz2d2, matriz2d3, nfil, ncol, size);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “mult failed!\n\n”);
return 1;
}
//cargar(matriz2d1, matriz2d2, matriz2d3, nfil, ncol, size); /Carga de archivos llenado de matriz 3/
printf(“\nDatos Matriz 3: \n”);
impresion(matriz2d3, nfil, ncol); //imprime la multiplicación
cudaStatus = cudaDeviceReset();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, “cudaDeviceReset failed!”);
return 1;
}
free(matriz2d1);
free(matriz2d2);
free(matriz2d3);
return 0;
}

Robert_Crovella · October 29, 2019, 3:28pm

Error 700 is cudaErrorIllegalAddress

(you can discover this yourself by passing the error code through cudaGetErrorString())

It means your kernel is making an illegal, out-of-bounds access.

That is a defect in your code and needs to be debugged. It is not a problem with your GPU, driver, or CUDA setup.

If your kernel is taking a long time, the first thing you want to do is rule out the possibility of a WDDM TDR timeout (google that).

Once you have ruled that out, then debug your code to find out why you are making an illegal access. The method described here:

[url]cuda - Unspecified launch failure on Memcpy - Stack Overflow

may be a good starting point.

Robert_Crovella · October 29, 2019, 3:42pm

Your attempt to use double pointer (int **matriz2d1) is broken. To pick just one example:

taking a double pointer:

int **dev_a = nullptr;

and the address of it, creates a triple-pointer. Then casting that triple-pointer to a double-pointer:

cudaStatus = cudaMalloc((void **)&dev_a

is not going to give you the correct behavior as a C/C++ programmer. That is a broken programming strategy, not just in CUDA, but in any usage of C/C++.

Sorting this out is fairly complicated.

If you are a beginner, the general recommendation would be to flatten your arrays, and reference them using a single pointer.

You might wish to study the matrix multiply example given in the programming guide:

[url]Programming Guide :: CUDA Toolkit Documentation

You will see there that the matrix pointers are “flattened” and so access looks like this:

C.elements[row * C.width + col] = Cvalue;

instead of like this:

matriz2d3[f][c] = temp;

Topic		Replies	Views
cudaThreadSynchronize() error CUDA Programming and Performance	1	2963	October 5, 2009
unknown error reported when running matrixMul CUDA Programming and Performance	0	2519	June 4, 2008
Driver error: 700 CUDA Programming and Performance	0	2398	September 24, 2009
RNG, CUDA CUDA Programming and Performance	6	5030	August 4, 2009
error after cuCtxSynchronize CUDA Programming and Performance	0	3785	March 12, 2010
cudaDeviceSynchronize error CUDA Programming and Performance	2	3852	February 17, 2014
incomprehensible behaviour limitations on kernel calls for host function? CUDA Programming and Performance	12	7046	April 28, 2011
Unknown error in example on NVS 135M CUDA Programming and Performance	0	1312	June 8, 2008
Hello CUDA! program not working - please help CUDA Programming and Performance	2	1244	February 17, 2010
multiple kernel calls from one host function strange behaviour when calling kernel CUDA Programming and Performance	4	1551	April 21, 2011

CUDA error 700 ?? cudaDeviceSynchronize returned error code 700

Related topics