Please provide the following info (tick the boxes after creating this topic):
Software Version
[*] DRIVE OS 6.0.6
DRIVE OS 6.0.5
DRIVE OS 6.0.4 (rev. 1)
DRIVE OS 6.0.4 SDK
other
Target Operating System
[*] Linux
QNX
other
Hardware Platform
[*] DRIVE AGX Orin Developer Kit (940-63710-0010-300)
DRIVE AGX Orin Developer Kit (940-63710-0010-200)
DRIVE AGX Orin Developer Kit (940-63710-0010-100)
DRIVE AGX Orin Developer Kit (940-63710-0010-D00)
DRIVE AGX Orin Developer Kit (940-63710-0010-C00)
DRIVE AGX Orin Developer Kit (not sure its number)
other
SDK Manager Version
[*] 1.9.2.10884
other
Host Machine Version
native Ubuntu Linux 20.04 Host installed with SDK Manager
[*] native Ubuntu Linux 20.04 Host installed with DRIVE OS Docker Containers
native Ubuntu Linux 18.04 Host installed with DRIVE OS Docker Containers
other
I can not submit two kernels with two cuda streams asynchronously, how can i do that
my yaml is:
---
Version: 3.0.0
gpu_multistream_multiprocess:
Identifier: 101
Resources:
CPU:
- CPU0
- CPU1
GPU:
- iGPU
Hyperepochs:
- hyperepoch0:
Epochs:
- epoch0:
Period: 100ms
Clients:
- clientGpuX:
Resources:
CUDA_STREAM:
- CUDA_STREAMX: iGPU
- CUDA_STREAMY: iGPU
Epochs:
- hyperepoch0.epoch0:
Runnables:
- submit1:
WCET: 10ms
Dependencies: []
Submits: clientGpuX.kernel1
Resources:
- CPU
- CUDA_STREAM
- kernel1:
WCET: 1ms
Dependencies: []
Resources:
- GPU
- submit2:
WCET: 10ms
Dependencies: []
Submits: clientGpuX.kernel2
Resources:
- CPU
- CUDA_STREAM
- kernel2:
WCET: 1ms
Dependencies: []
Resources:
- GPU
code:
cudaStream_t m_streamx, m_streamy;
cudaEvent_t start, stop;
int* canaryHost;
int* canaryDevice;
//static int testVal;
void submit1(void* params, cudaStream_t stream)
{
(void)params;
testKernel1(stream);
fprintf(stderr, "In submit1 start\n");
sleep(1);
fprintf(stderr, "In submit1 end\n");
}
void submit2(void* params, cudaStream_t stream)
{
(void)params;
testKernel2(stream);
fprintf(stderr, "In submit2\n");
}
void test1(void* params)
{
(void)params;
fprintf(stderr, "In test 1\n");
}
void test2(void* params)
{
(void)params;
fprintf(stderr, "In test 2\n");
}
int main(int argc, const char** argv)
{
(void)argc;
(void)argv;
assert(cudaStreamCreateWithFlags(&m_streamx, cudaStreamNonBlocking) == cudaSuccess);
assert(cudaStreamCreateWithFlags(&m_streamy, cudaStreamNonBlocking) == cudaSuccess);
stmClientInit("clientGpuX"); // Needs to be called before registration
// stmRegisterCpuRunnable(test1, "test1", NULL);
stmRegisterCudaSubmitter(submit1, "submit1", NULL);
stmRegisterCudaSubmitter(submit2, "submit2", NULL);
// Register all resources in the workload
stmRegisterCudaResource("CUDA_STREAMX", m_streamx);
stmRegisterCudaResource("CUDA_STREAMY", m_streamy);
stmEnterScheduler();
cudaDeviceSynchronize();
stmClientExit(); // Removes all STM data structures. Can't use STM calls anymore after this.
}