CUDA multiple gpus page-locked memory malloc and free

I have Two GPUs and malloc page-locked memory each other. here is my code.

cudaError_t error;
error = cudaSetDevice(1);
int* a = nullptr;
error = cudaHostAlloc(&a, sizeof(int) * 2048 * 1000 * 100, cudaHostAllocPortable);
error = cudaFreeHost(a);
error = cudaHostAlloc(&a, sizeof(int) * 2048 * 1000 * 100, cudaHostAllocPortable);
error = cudaFreeHost(a);
error = cudaHostAlloc(&a, sizeof(int) * 2048 * 1000 * 100, cudaHostAllocPortable);
error = cudaFreeHost(a);
error = cudaHostAlloc(&a, sizeof(int) * 2048 * 1000 * 100, cudaHostAllocPortable);
error = cudaFreeHost(a);
error = cudaHostAlloc(&a, sizeof(int) * 2048 * 1000 * 100, cudaHostAllocPortable);
error = cudaFreeHost(a);
error = cudaSetDevice(0);
int* b = nullptr;
error = cudaHostAlloc(&b, sizeof(int) * 2048 * 1000 * 100, cudaHostAllocPortable);
error = cudaFreeHost(b);
error = cudaHostAlloc(&b, sizeof(int) * 2048 * 1000 * 100, cudaHostAllocPortable);
error = cudaFreeHost(b);
error = cudaHostAlloc(&b, sizeof(int) * 2048 * 1000 * 100, cudaHostAllocPortable);
error = cudaFreeHost(b);

before “cudaSetDevice(0)” is ok. after “cudaSetDevice(0)” and cudaHostAlloc, GPU1 page-locked memory increase Normal. GPU0 page-locked memory increase, why?

and i try to one thread run GPU0 malloc and free, and other thread run GPU1, the same situation is the case.

if i need to use page-locked memory in each GPU, how can i code?
after GPU0 excute, GPU1 “cudaHostAlloc”, GPU0 pinned memory increase, cudaFreeHost(b), just free GPU1 pinned memory, GPU0 not free. To run any more, GPU0 pinned memory is full and GPU1 run “cudaHostAlloc” fail, return “cudaErrorMemoryAllocation”. so I’m so depressed. so why how to malloc pinned memory for each GPU and memory not increase or can free.
CUDA 10.2 vs 2019
Thanks.