Hello,
I have this code, I don’t know what is going on, it is just a simple code for CUDA, but when I try to debug or just run it, it always gives this error.
aaa.exe has triggered a breakpoint. occurred
It says that it stopped at exceptions, “An exception was caught by the debugger, and user settings indicate that break should occur”
I have tried to mark and unmark several exceptions, but nothing seems to work.
At any time I try to access a CUDA function, in this case, it happens when it reaches line 54.
I am not sure if this is a bug or what, I have tried VS pro (2015, 2017, 2019), CUDA 10 and CUDA 10.1 with all its combinations. But I’m still getting this error.
I am working on Windows 10.0.17763 (Build 17763)
I actually worked on more complicated codes a few months ago with no problems, but now this thing is apparently nonsense.
Help, please.
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <cuda.h>
#include <stdio.h>
#include <iostream>
#include <fstream>
// Helper function for using CUDA to add vectors in parallel.
void cuda_code(float* a, float* x, int N, int dim);
__global__ void kernel_func(float* a, float* x, int N, int dim)
{
int i = threadIdx.x;
printf("Hello from thread %d \n", i);
__syncthreads;
//c[i] = a[i] + b[i];
}
void readm(float* m, int n);
void printm(float *m, float*x, int n);
int main() {
float* m;
float* x;
int dim;
int N = 10;
dim = (N + 1) * N;
m = (float*)malloc(dim);
x = (float*)malloc(N);
readm(m, N);
cuda_code(m, x, N, dim);
cudaDeviceReset();
return 0;
}
void cuda_code(float* a, float* x, int N, int dim)
{
size_t size1 = dim * sizeof(float);
size_t size2 = N * sizeof(float);
float* dev_a;
float* dev_x;
// Allocate GPU buffers for three vectors (two input, one output).
cudaMalloc((void**)&dev_a, size1);
cudaMalloc((void**)&dev_x, size2);
// Copy input vectors from host memory to GPU buffers.
cudaMemcpy(dev_a, a, dim * sizeof(float), cudaMemcpyHostToDevice);
//cudaMemcpy(dev_x, x, N * sizeof(float), cudaMemcpyHostToDevice);
// Launch a kernel on the GPU with one thread for each element.
kernel_func << <1, 1 >> > (dev_a, dev_x, N, dim);
cudaDeviceSynchronize();
// Copy output vector from GPU buffer to host memory.
cudaMemcpy(x, dev_x, N * sizeof(float), cudaMemcpyDeviceToHost);
cudaFree(dev_a);
cudaFree(dev_x);
}
void readm(float* m, int n) {
using namespace std;
ifstream myfile;
myfile.open("matrix.txt");
if (myfile.is_open()) {
for (int j = 0; j < n; j++)
{
for (int i = 0; i < (n + 1); i++)
{
int ij = i + ((n + 1) * j);
myfile >> m[ij];
}
}
}
myfile.close();
}
void printm(float* m, float* x, int n) {
std::ofstream myfile;
myfile.open("matrix2.txt");
if (myfile.is_open()) {
for (int j = 0; j < n; j++)
{
for (int i = 0; i < n + 1; i++)
{
myfile << m[i + (n + 1) * j] << "\t";
}
myfile << "\n";
}
for (int j = 0; j < n; j++)
{
myfile << x[j] << "\n";
}
}
myfile.close();
}