Calculating determinant of a (3x3) matrix with Cuda

I would like to implement the following algorithm :

  1. Save all 3x3 matrices in 1 dimensional array
  2. Every 9 elements form a matrix
  3. Send the array to kernel
  4. Every thread finds the determinant of 1 matrix

Here is the (incomplete) variant of a code that I tried to write:
Could you give me some advice on how can I correct the code to work according to the given task?
Thanks in advance!
Yoan
#include “cuda_runtime.h”
#include “device_launch_parameters.h”

#include <stdio.h>
//method for calculating the determinant of a matrix (3rd) → calculateMatrixWithCuda
cudaError_t calculateMatrixWithCuda(int *c, const int *a, const int *b, unsigned int size);

//method for sending to kernel
global void calculateDeterminantOfAMatrixKernel(int *c, const int *a, const int *b)
{
int i = threadIdx.x;
int y = threadIdy.y;
//writing the extensive logic here…

}

int main()
{
//setting up value of the array size, each matrix with dimension 3x3 → 45 elements in total
const int arraySize = 9*5;
//on the next lines - the matrices follow - 5 in total
const int matrix[arraySize] = {
//0 1 2 3 4 5 6 7 8
34,23,245,231,345,235,2,8,43,
33,990,48,84,38,384,23,40,4,
67,33,356,8,7,34,43,656,345,
8,12,65,45,567,78,65,67,8,
90,567,34,67,756,767,457,74,66
};
//array for storing the result of the operation
int c[arraySize/9] = { 0 };

// Add vectors in parallel.
cudaError_t cudaStatus = calculateMatrixWithCuda(c, a, b, arraySize);
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "calculateDeterminantWithCuda failed!");
    return 1;
}

printf("det_matrices = %d}\n",
    c[0], c[1], c[2], c[3], c[4]);

// cudaDeviceReset must be called before exitъing in order for profiling and
// tracing tools such as Nsight and Visual Profiler to show complete traces.
cudaStatus = cudaDeviceReset();
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaDeviceReset failed!");
    return 1;
}

return 0;

}

// Helper function for CUDA to add vectors in parallel.
cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size)
{
int *dev_a = 0;
int *dev_b = 0;
int *dev_c = 0;
cudaError_t cudaStatus;

// Choose which GPU to run on, change this on a multi-GPU system.
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");
    goto Error;
}

// Allocate GPU buffers for three vectors (two input, one output)    .
cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMalloc failed!");
    goto Error;
}

cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMalloc failed!");
    goto Error;
}

cudaStatus = cudaMalloc((void**)&dev_b, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMalloc failed!");
    goto Error;
}
// Copy input vectors from host memory to GPU buffers.
cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMemcpy failed!");
    goto Error;
}

cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMemcpy failed!");
    goto Error;
}

// Launch a kernel on the GPU with one thread for each element.
addKernel<<<1, size>>>(dev_c, dev_a, dev_b);

// Check for any errors launching the kernel
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
    goto Error;
}

// cudaDeviceSynchronize waits for the kernel to finish, and returns
// any errors encountered during the launch.
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching calculateMatrixKernel!\n", cudaStatus);
    goto Error;
}

// Copy output vector from GPU buffer to host memory.
cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMemcpy failed!");
    goto Error;
}

//free the resources
Error:
cudaFree(dev_c);
cudaFree(dev_a);
cudaFree(dev_b);

return cudaStatus;

}

Hi @user301 ! Welcome to the NVIDIA developer forums!

Did you know that we have some dedicated CUDA forums? I redirect you to there to get you all the help you need.

when posting code here, please properly format your code. Simple steps are to edit your post (click pencil icon below your post), then select all code, then click the </> button at top of edit window, then save your changes.

This post may be of interest.