My kernel returns default values. Does it not not like my types?

What I am trying to do is the armature skinning routine like this vertex shader would. So that I would have a physical representation of my mesh. This is the first kernel that I have written on my own. I would also like to ask how much of this can be reused?

#version 430 core

layout(location = 0) in vec3 pos;
layout(location = 1) in vec3 norm;
layout(location = 2) in vec2 tex;
layout(location = 5) in ivec4 boneIds; 
layout(location = 6) in vec4 weights;
	
uniform mat4 projection;
uniform mat4 view;
uniform mat4 model;
	
const int MAX_BONES = 100;
const int MAX_BONE_INFLUENCE = 4;
uniform mat4 finalBonesMatrices[MAX_BONES];
	
out vec2 TexCoords;
	
void main()
{
    vec4 totalPosition = vec4(0.0f);
    for(int i = 0 ; i < MAX_BONE_INFLUENCE ; i++)
    {
        if(boneIds[i] == -1) 
            continue;
        if(boneIds[i] >=MAX_BONES) 
        {
            totalPosition = vec4(pos,1.0f);
            break;
        }
        vec4 localPosition = finalBonesMatrices[boneIds[i]] * vec4(pos,1.0f);
        totalPosition += localPosition * weights[i];
        vec3 localNormal = mat3(finalBonesMatrices[boneIds[i]]) * norm;
    }
		
    mat4 viewModel = view * model;
    gl_Position =  projection * viewModel * totalPosition;
    TexCoords = tex;
}#version 430 core

layout(location = 0) in vec3 pos;
layout(location = 1) in vec3 norm;
layout(location = 2) in vec2 tex;
layout(location = 5) in ivec4 boneIds; 
layout(location = 6) in vec4 weights;
	
uniform mat4 projection;
uniform mat4 view;
uniform mat4 model;
	
const int MAX_BONES = 100;
const int MAX_BONE_INFLUENCE = 4;
uniform mat4 finalBonesMatrices[MAX_BONES];
	
out vec2 TexCoords;
	
void main()
{
    vec4 totalPosition = vec4(0.0f);
    for(int i = 0 ; i < MAX_BONE_INFLUENCE ; i++)
    {
        if(boneIds[i] == -1) 
            continue;
        if(boneIds[i] >=MAX_BONES) 
        {
            totalPosition = vec4(pos,1.0f);
            break;
        }
        vec4 localPosition = finalBonesMatrices[boneIds[i]] * vec4(pos,1.0f);
        totalPosition += localPosition * weights[i];
        vec3 localNormal = mat3(finalBonesMatrices[boneIds[i]]) * norm;
    }
		
    mat4 viewModel = view * model;
    gl_Position =  projection * viewModel * totalPosition;
    TexCoords = tex;
}

This is my kernel.h

#pragma once

struct Matrix {
	float val[4][4];
};

struct Vector3 {
	float val[3];
};

struct Vector4 {
	float val[4];
};

struct IVector4 {
	int val[4];
};

using namespace std;

#include <vector>
#include "glm/glm.hpp"
#include "animvert.h"

void setLayout(vector<AnimVertex> verts, glm::mat4 scale);

void CalcPositions(vector<glm::mat4> boneMats, glm::mat4 model, glm::vec4* h_result);

this is my kernel.cu

#include "kernel.h"

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <stdio.h>



__constant__ Matrix d_model;

__constant__ Matrix d_uscale;

__constant__ Matrix* d_boneMats;

__device__ Matrix MatMul(Matrix a, Matrix b) {
    Matrix ret;
    for (int i = 0; i < 4; i++) {
        for (int j = 0; j < 4; j++) {
            float sum = 0;
            for (int k = 0; k < 4; k++) {
                sum += a.val[j][k] * b.val[k][i];
            }
            ret.val[j][i] = sum;
        }
    }
    return ret;
}

__device__ Vector4 MatMul(Matrix a, Vector4 b) {
    Vector4 ret;
    for (int i = 0; i < 4; i++) {
        float sum = 0;
        for (int j = 0; j < 4; j++) {
            sum += a.val[j][i] * b.val[j];
        }
        ret.val[i] = sum;
    }
    return ret;
}

__global__ void doSkinning(IVector4* d_boneIds, Vector4* d_weights, Vector3* inpos, Vector4* outpos, int n)
{
    int tid = blockIdx.x * blockDim.x + threadIdx.x;
    if (tid < n) {
        bool nhasdef = true;
        Vector4 position;
        for (int i = 0; i < 4; i++) position.val[i] = 0;
        int id;
        Vector4 lpos;
        for (int i = 0; i < 4; i++) {
            Vector4 filled;
            for (int j = 0; j < 3; j++) filled.val[j] = inpos[tid].val[j];
            filled.val[3] = 1;
            if (d_boneIds[tid].val[i] >= 0) {
                nhasdef = false;
                id = d_boneIds[tid].val[i];
                lpos = MatMul(d_boneMats[id], filled);
                for (int j = 0; j < 4; j++) position.val[j] += d_weights[tid].val[i] * lpos.val[j];
                
            }
        }
        if (nhasdef) {
            Vector4 filled;
            for (int j = 0; j < 3; j++) filled.val[j] = inpos[tid].val[j];
            filled.val[3] = 1;
            outpos[tid] = MatMul(d_model, filled);
            
        }else{
            Matrix res = MatMul(d_model, d_uscale);
            outpos[tid] = MatMul(res, position);
        }
    }
}

int num = 0;
IVector4* myBoneIds;
Vector4* myWeights;
Vector3* myPositions;

Matrix myUscale;


void setLayout(vector<AnimVertex> verts, glm::mat4 scale) {
    num = verts.size();
    myBoneIds = new IVector4[num];
    myWeights = new Vector4[num];
    myPositions = new Vector3[num];
    for (int i = 0; i < num; i++) {
        for (int j = 0; j < 4; j++) {
            if (j < 3) myPositions[i].val[j] = verts.at(i).position[j];
            myBoneIds[i].val[j] = verts.at(i).m_BoneIDs[j];
            myWeights[i].val[j] = verts.at(i).m_Weights[j];
        }
    }
    
    for (int i = 0; i < 4;i++) for (int j =0;j<4;j++) myUscale.val[i][j] = scale[i][j];
}

void CalcPositions(vector<glm::mat4> boneMats, glm::mat4 model, glm::vec4 *f_result)
{
    size_t matSize = boneMats.size() * sizeof(Matrix);
    size_t idSize = num * sizeof(IVector4);
    size_t whaSize = num * sizeof(Vector4);
    size_t posSize = num * sizeof(Vector3);
    size_t resSize = num * sizeof(glm::vec4);

    IVector4 *d_boneIds;
    Vector4 *d_weights;
    Vector3 *d_positions;
    Vector4 *d_result;

    vector<Matrix> pboneMats;
    for (int i = 0; i < boneMats.size(); i++) {
        Matrix mat;
        for (int j = 0; j < 4; j++) {
            for (int k = 0; k < 4; k++) mat.val[j][k] = boneMats.at(i)[j][k];
        }
        pboneMats.push_back(mat);
    }

    Vector4* h_result = (Vector4*)malloc(whaSize);
    f_result = (glm::vec4*)malloc(resSize);

    cudaMalloc(&d_boneIds, idSize);
    cudaMalloc(&d_weights, whaSize);
    cudaMalloc(&d_positions, posSize);
    cudaMalloc(&d_result, whaSize);

    cudaMemcpy(d_boneIds, myBoneIds, idSize, cudaMemcpyHostToDevice);
    cudaMemcpy(d_weights, myWeights, whaSize, cudaMemcpyHostToDevice);
    cudaMemcpy(d_positions, myPositions, posSize, cudaMemcpyHostToDevice);

    cudaMemcpyToSymbol(d_boneMats, &pboneMats, matSize);
    cudaMemcpyToSymbol(&d_uscale, &myUscale, sizeof(Matrix));
    cudaMemcpyToSymbol(&d_model, &model, sizeof(Matrix));

    int THREADS = 256;
    int GRID = (num + THREADS - 1) / THREADS;

    doSkinning<<<GRID, THREADS>>>(d_boneIds, d_weights, d_positions, d_result, num);

    cudaMemcpy(h_result, d_result, whaSize, cudaMemcpyDeviceToHost);

    for (int i = 0; i < num; i++) {
        
        for (int j = 0; j < 4; j++) {
            f_result[i][j] = h_result[i].val[j];
            
        }
        
    }

    cudaFree(d_result);
    cudaFree(d_positions);
    cudaFree(d_weights);
    cudaFree(d_boneIds);
}

start by implementing proper CUDA error checking (google that, take the first hit, apply it to your code).

Are any errors reported?