Calculating determinant of a (3x3) matrix with Cuda

user301 · December 30, 2021, 5:48am

I would like to implement the following algorithm :

Save all 3x3 matrices in 1 dimensional array
Every 9 elements form a matrix
Send the array to kernel
Every thread finds the determinant of 1 matrix

Here is the (incomplete) variant of a code that I tried to write:
Could you give me some advice on how can I correct the code to work according to the given task?
Thanks in advance!
Yoan
#include “cuda_runtime.h”
#include “device_launch_parameters.h”

#include <stdio.h>
//method for calculating the determinant of a matrix (3rd) → calculateMatrixWithCuda
cudaError_t calculateMatrixWithCuda(int *c, const int *a, const int *b, unsigned int size);

//method for sending to kernel
global void calculateDeterminantOfAMatrixKernel(int *c, const int *a, const int *b)
{
int i = threadIdx.x;
int y = threadIdy.y;
//writing the extensive logic here…

}

int main()
{
//setting up value of the array size, each matrix with dimension 3x3 → 45 elements in total
const int arraySize = 9*5;
//on the next lines - the matrices follow - 5 in total
const int matrix[arraySize] = {
//0 1 2 3 4 5 6 7 8
34,23,245,231,345,235,2,8,43,
33,990,48,84,38,384,23,40,4,
67,33,356,8,7,34,43,656,345,
8,12,65,45,567,78,65,67,8,
90,567,34,67,756,767,457,74,66
};
//array for storing the result of the operation
int c[arraySize/9] = { 0 };

// Add vectors in parallel.
cudaError_t cudaStatus = calculateMatrixWithCuda(c, a, b, arraySize);
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "calculateDeterminantWithCuda failed!");
    return 1;
}

printf("det_matrices = %d}\n",
    c[0], c[1], c[2], c[3], c[4]);

// cudaDeviceReset must be called before exitъing in order for profiling and
// tracing tools such as Nsight and Visual Profiler to show complete traces.
cudaStatus = cudaDeviceReset();
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaDeviceReset failed!");
    return 1;
}

return 0;

}

// Helper function for CUDA to add vectors in parallel.
cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size)
{
int *dev_a = 0;
int *dev_b = 0;
int *dev_c = 0;
cudaError_t cudaStatus;

// Choose which GPU to run on, change this on a multi-GPU system.
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");
    goto Error;
}

// Allocate GPU buffers for three vectors (two input, one output)    .
cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMalloc failed!");
    goto Error;
}

cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMalloc failed!");
    goto Error;
}

cudaStatus = cudaMalloc((void**)&dev_b, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMalloc failed!");
    goto Error;
}
// Copy input vectors from host memory to GPU buffers.
cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMemcpy failed!");
    goto Error;
}

cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMemcpy failed!");
    goto Error;
}

// Launch a kernel on the GPU with one thread for each element.
addKernel<<<1, size>>>(dev_c, dev_a, dev_b);

// Check for any errors launching the kernel
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
    goto Error;
}

// cudaDeviceSynchronize waits for the kernel to finish, and returns
// any errors encountered during the launch.
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching calculateMatrixKernel!\n", cudaStatus);
    goto Error;
}

// Copy output vector from GPU buffer to host memory.
cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMemcpy failed!");
    goto Error;
}

//free the resources
Error:
cudaFree(dev_c);
cudaFree(dev_a);
cudaFree(dev_b);

return cudaStatus;

}

MarkusHoHo · January 17, 2022, 3:47pm

Hi @user301 ! Welcome to the NVIDIA developer forums!

Did you know that we have some dedicated CUDA forums? I redirect you to there to get you all the help you need.

Robert_Crovella · January 21, 2022, 1:35am

when posting code here, please properly format your code. Simple steps are to edit your post (click pencil icon below your post), then select all code, then click the </> button at top of edit window, then save your changes.

This post may be of interest.