Dynamic 2d gpu array coping back to host

hey guys I’m pretty new to cuda
I expect this code to copy the first row from gpu into my host pointer
I’m looking for a way to copy a 2d dynamic gpu array back to host array
ps : i already know if i use static array i will be fine so consider the fact that i have dynamic array
thank you
also I get an exception when i try to allocate the a[0]

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <iostream>
__global__ void kernel(int** a) {
	a[0]=(int*)malloc( sizeof(int) * 100);
	a[0][0] = 100;
	a[0][1] = 11;
	printf("%d\n", a[0][0]);
__global__ void printAfter(int* a) {
	printf("%d\n", a[1]);
int main() {
	int** a;
	cudaMallocManaged(&a, sizeof(int*) * 10);
	//cudaMalloc(&a[0], sizeof(int));
	kernel<< <1, 1 >> > (a);
	int* aa  =new int[10];
	cudaMallocHost(&aa, sizeof(int) * 14);
	int** c;
	cudaMallocHost(&c, sizeof(int*) * 100);
	cudaMemcpy(c, a, sizeof(int*)*10, cudaMemcpyDeviceToHost);
	printAfter << <1, 1 >> > (c[0]);
	cudaMemcpy(aa, c[0], sizeof(int) ,cudaMemcpyDeviceToHost);
	printf("%d", aa[1]);

It’s invalid to use kernel malloc buffers on the host.
From the programming guide:
In addition, memory allocated by a call to malloc() or __nv_aligned_device_malloc() in device code cannot be used in any runtime or driver API calls (i.e. cudaMemcpy, cudaMemset, etc).