Compile Warning when initializing cuda/pipeline

I am getting the following warning when compile my code using a dual stage pipeline

warning #20054-D: dynamic initialization is not supported for a function-scope static __shared__ variable within a __device__/__global__ function
      __attribute__((shared)) cuda::pipeline_shared_state<cuda::thread_scope::thread_scope_block, 2> pipeline_state;

My function is as follows,

#include <cooperative_groups.h>
#include <cooperative_groups/memcpy_async.h>
#include <cuda/pipeline> // only supported on sm_70 and above

namespace cg = cooperative_groups;
#define NUM_STAGES 2 // number of stages in the pipeline

__global__ void pipeline_example(int *global1, int *global2, size_t count) {
    extern __shared__ int s[]; // size controlled by the host launch config
    cg::thread_block group = cg::this_thread_block();

    int *shared[NUM_STAGES] = {s, s + 2 * group.size()}; // each step process 2 global chunks

    // create a pipeline shared state
    __shared__ cuda::pipeline_shared_state<cuda::thread_scope::thread_scope_block, NUM_STAGES> pipeline_state;
    auto pipeline = cuda::make_pipeline(group, &pipeline_state);

    size_t fetch, subset;
    for (subset = fetch = 0; subset < count; ++subset) {
        // fetch data up to NUM_STAGES chunks ahead
        for (; fetch < subset + NUM_STAGES && fetch < count; ++fetch) {
            // fetch data from global memory to shared memory
            pipeline.producer_acquire();
            cuda::memcpy_async(group, shared[fetch % NUM_STAGES], 
                            global1 + fetch * group.size(), sizeof(int) * group.size(), pipeline);
            cuda::memcpy_async(group, shared[fetch % NUM_STAGES] + group.size(), 
                            global2 + fetch * group.size(), sizeof(int) * group.size(), pipeline);
            pipeline.producer_commit();
        }
        pipeline.consumer_wait(); // wait for the data to be fetched
        int *data = shared[subset % NUM_STAGES];
        global1[subset * group.size() + group.thread_rank()] = data[group.thread_rank()] + data[group.thread_rank() + group.size()];
        global2[subset * group.size() + group.thread_rank()] = data[group.thread_rank()] * data[group.thread_rank() + group.size()];
        pipeline.consumer_release();
    }

}

I am running in WSL2 with cuda12.1. The output is correct, so I am wondering what the meaning of the warning is and how to work around.