When Try to allocate pinned memory on Jetson Orin Platform,I found a huge gpu memory usage increasing ? Why ?
code for allocate about 4GB Memory
import torch
from typing import List
def _allocate_kv_cache(
) -> List[torch.Tensor]:
"""Allocates KV cache on the specified device."""
kv_cache_shape = (4681, 2, 16, 4, 128)
pin_memory = True
kv_cache: List[torch.Tensor] = []
for _ in range(28):
# null block in CpuGpuBlockAllocator requires at least that
# block to be zeroed-out.
# We zero-out everything for simplicity.
kv_cache.append(
torch.zeros(kv_cache_shape,
dtype=torch.float16,
pin_memory=pin_memory,
device="cpu"))
return kv_cache
_allocate_kv_cache()
Memory Usage After allocating:
And Why ?