Hey! I am trying to allocate two ptr to the L2 cache, but from the time spending result…Seems the later one will cover the previous one?? Would you provide some help? Thank you!!!
This is the official link
cudaStream_t stream;
cudaStreamCreate(&stream); // Create CUDA stream
cudaStreamAttrValue stream_attribute; // Stream level attributes data structure
stream_attribute.accessPolicyWindow.base_ptr = reinterpret_cast<void*>(dev_b); // Global Memory data pointer
stream_attribute.accessPolicyWindow.num_bytes = k * n * sizeof(float); // Number of bytes for persistence access.
// (Must be less than cudaDeviceProp::accessPolicyMaxWindowSize)
stream_attribute.accessPolicyWindow.hitRatio = 1; // Hint for cache hit ratio
stream_attribute.accessPolicyWindow.hitProp = cudaAccessPropertyPersisting; // Type of access property on cache hit
stream_attribute.accessPolicyWindow.missProp = cudaAccessPropertyStreaming; // Type of access property on cache miss.
stream_attribute.accessPolicyWindow.base_ptr = reinterpret_cast<void*>(dev_wei2); // Global Memory data pointer
stream_attribute.accessPolicyWindow.num_bytes = rank2 * n * sizeof(float); // Number of bytes for persistence access.
// (Must be less than cudaDeviceProp::accessPolicyMaxWindowSize)
stream_attribute.accessPolicyWindow.hitRatio = 1; // Hint for cache hit ratio
stream_attribute.accessPolicyWindow.hitProp = cudaAccessPropertyPersisting; // Type of access property on cache hit
stream_attribute.accessPolicyWindow.missProp = cudaAccessPropertyStreaming; // Type of access property on cache miss.
//Set the attributes to a CUDA stream of type cudaStream_t
cudaStreamSetAttribute(stream, cudaStreamAttributeAccessPolicyWindow, &stream_attribute);
Another try is this, also failed…
cudaStream_t stream;
cudaStreamCreate(&stream); // Create CUDA stream
cudaStreamAttrValue stream_attribute; // Stream level attributes data structure
stream_attribute.accessPolicyWindow.base_ptr = reinterpret_cast<void*>(dev_b); // Global Memory data pointer
stream_attribute.accessPolicyWindow.num_bytes = k * n * sizeof(float); // Number of bytes for persistence access.
// (Must be less than cudaDeviceProp::accessPolicyMaxWindowSize)
stream_attribute.accessPolicyWindow.hitRatio = 1; // Hint for cache hit ratio
stream_attribute.accessPolicyWindow.hitProp = cudaAccessPropertyPersisting; // Type of access property on cache hit
stream_attribute.accessPolicyWindow.missProp = cudaAccessPropertyStreaming; // Type of access property on cache miss.
cudaStreamAttrValue stream_attribute1; // Stream level attributes data structure
stream_attribute1.accessPolicyWindow.base_ptr = reinterpret_cast<void*>(dev_wei2); // Global Memory data pointer
stream_attribute1.accessPolicyWindow.num_bytes = rank2 * n * sizeof(float); // Number of bytes for persistence access.
// (Must be less than cudaDeviceProp::accessPolicyMaxWindowSize)
stream_attribute1.accessPolicyWindow.hitRatio = 1; // Hint for cache hit ratio
stream_attribute1.accessPolicyWindow.hitProp = cudaAccessPropertyPersisting; // Type of access property on cache hit
stream_attribute1.accessPolicyWindow.missProp = cudaAccessPropertyStreaming; // Type of access property on cache miss.
//Set the attributes to a CUDA stream of type cudaStream_t
cudaStreamSetAttribute(stream, cudaStreamAttributeAccessPolicyWindow, &stream_attribute);
cudaStreamSetAttribute(stream, cudaStreamAttributeAccessPolicyWindow, &stream_attribute1);