Opencl and NVENCODE cannot run at the same time

This is my computer’s settings:
RTX3070
arg

When I try to run FFmpeg h264_nvenc and opencl at the same time, the program blocks, and this only happens with 32-bit applications, it works fine when running with 64-bit.
At the same time, it is normal to run 32-bit applications on some computers, such as the GPU information below:
RTX2060 with Max-Q design 30.0.14.9729

opencl thread blocked in clCreateContext

#include<cstdio>
#include<iostream>
#include <string>
#include <thread>

#include <CL/cl.h>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavutil/opt.h>
#include <libavutil/imgutils.h>
#include <libavutil/avutil.h>
#include <libavutil/error.h>
#include <libavutil/frame.h>
#include <libavutil/opt.h>
#include <libavutil/pixdesc.h>
#include <libavutil/display.h>
#include <libavutil/mastering_display_metadata.h>
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <libavcodec/avcodec.h>
#include <libavcodec/bsf.h>
}

#pragma warning( disable : 4996 )

#define KERNEL(...) #__VA_ARGS__

std::string getPlatformName(const cl_platform_id pid) {
    size_t param_value_size;
    clGetPlatformInfo(pid, CL_PLATFORM_NAME, 0, NULL, &param_value_size);
    char param_value[2048];
    clGetPlatformInfo(pid, CL_PLATFORM_NAME, param_value_size, param_value, NULL);
    std::string strdevicetype(param_value);
    return strdevicetype;
}

cl_platform_id getPlatFormIdx()
{
    cl_platform_id platform;
    cl_uint platformNum = 0;
    cl_int status = clGetPlatformIDs(0, NULL, &platformNum);
    std::cout << "platformNum = " << platformNum << std::endl;
    cl_platform_id* plat = (cl_platform_id*)malloc(platformNum * sizeof(cl_platform_id));
    status = clGetPlatformIDs(platformNum, plat, NULL);
    int independenrGPUIdx = 0;
    if (platformNum > 1) {
        std::string nvidia_string = "NVIDIA";
        for (int n_plat = 0; n_plat < platformNum; n_plat++) {
            std::string palt_name = getPlatformName(plat[n_plat]);
            if (palt_name.find(nvidia_string) != std::string::npos) {
                independenrGPUIdx = n_plat;
            }
        }
    }
    else {
        independenrGPUIdx = 0;
    }
    platform = plat[independenrGPUIdx];
    free(plat);
    std::cout << "NVIDIA independenrGPUIdx = " << independenrGPUIdx << std::endl;
    return platform;
}

std::string programString = KERNEL(
__kernel void ocl_kernel_test(__global const unsigned char* imgSrc, int rows, int cols, __global unsigned char* imgDst)
{
    int dx = get_global_id(0);
    int dy = get_global_id(1);
    if (dx < cols && dy < rows)
    {
        int idx = dy * rows + dx;
        uchar4 rgba = vload4(0, imgSrc + 4 * idx);
        vstore4(rgba, 0, imgDst + 4 * (dy * rows + dx));
    }
}
);
void opencl_test()
{
	cl_int status = 0;
	cl_uint platformnum = 0;
    cl_platform_id platform;
    cl_device_id device;
    cl_context context;
    cl_command_queue command_queue;
    cl_program program;
    std::cout << "into opencl_test" << std::endl;
    platform = getPlatFormIdx();
    cl_uint deviceNum = 0;
    std::cout << "00start clGetDeviceIDs" << std::endl;
    status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &deviceNum);
    if (deviceNum < 1) {
        std::cout << "Device Num < 1 !"<< std::endl;
        return ;
    }
    std::cout << "00end  clGetDeviceIDs" << std::endl;

    std::cout << "11start clGetDeviceIDs" << std::endl;
    status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
    std::cout << "11end  clGetDeviceIDs" << std::endl;
    int index = 0;
    cl_context_properties context_properties[11] = { 0 };
    context_properties[index++] = CL_CONTEXT_PLATFORM;
    context_properties[index++] = (cl_context_properties)platform;
    std::cout << "22start clCreateContext" << std::endl;
    //block here
    context = clCreateContext(context_properties, 1, &device, 
        [](const char* errinfo, const void* private_info, size_t cb, void* user_data) {
            std::cout << errinfo << std::endl;
        }, NULL, &status);
    std::cout << "22end  clCreateContext" << std::endl;
    std::cout << "33start clCreateCommandQueue" << std::endl;
    command_queue = clCreateCommandQueue(context, device, 0, &status);
    std::cout << "33end  clCreateCommandQueue" << std::endl;


    const char* programSource = programString.c_str();
    size_t programSize = programString.length();
    program = clCreateProgramWithSource(context, 1, (const char**)&programSource, &programSize, &status);
    std::string option = "-DT=float -DT4=float4 -DT8=float8 -DREAD_IMAGET=read_imagef -DWRITE_IMAGET=write_imagef -DCONVERT_T4=convert_float4";
    status = clBuildProgram(program, 1, &device, option.c_str(), NULL, NULL);
    cl_kernel kernel = clCreateKernel(program, "ocl_kernel_test", &status);

    const int width  = 480;
    const int height = 720;
    unsigned char* src_cpu = (unsigned char*)malloc(width * height * 4);
    unsigned char* dst_cpu = (unsigned char*)malloc(width * height * 4);
    memset(dst_cpu, 0, width * height * 4);
    for (int i = 0; i < width * height; i++) {
        src_cpu[4 * i] = 86;
        src_cpu[4 * i + 1] = 128;
        src_cpu[4 * i + 2] = 64;
        src_cpu[4 * i + 3] = 255;
    }
    cl_mem cl_input  = clCreateBuffer(context, CL_MEM_READ_WRITE, width * height * 4 * sizeof(unsigned char), NULL, &status);
    cl_mem cl_output = clCreateBuffer(context, CL_MEM_READ_WRITE, width * height * 4 * sizeof(unsigned char), NULL, &status);
    for (int i = 0; i < 10; i++) {
        clEnqueueWriteBuffer(command_queue, cl_input, CL_TRUE, 0, width * height * 4, src_cpu, 0, nullptr, nullptr);
        status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &cl_input);
        status = clSetKernelArg(kernel, 1, sizeof(int), &height);
        status = clSetKernelArg(kernel, 2, sizeof(int), &width);
        status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &cl_output);
        size_t localSize[2]  = { size_t(16),size_t(16) };
        size_t globalSize[2] = { size_t((width)),size_t((height)) };
        status = clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, globalSize, localSize, 0, NULL, NULL);
        clFinish(command_queue);
        clEnqueueReadBuffer(command_queue, cl_output, CL_TRUE, 0, width * height * 4, dst_cpu, 0, nullptr, nullptr);
    }

    for (int i = 1024; i < 1028; i++) {
        std::cout << "i = " << i << "  dst_val = " << (int)dst_cpu[i] << std::endl;
    }
    free(src_cpu);
    free(dst_cpu);
    clReleaseMemObject(cl_input);
    clReleaseMemObject(cl_output);
    clReleaseKernel(kernel);
    clReleaseProgram(program);
    clReleaseCommandQueue(command_queue);
    clReleaseContext(context);
    std::cout << "end opencl_test" << std::endl;
}

static void encode(AVCodecContext* enc_ctx, AVFrame* frame, AVPacket* pkt,
    FILE* outfile)
{
    std::cout << "FFMpeg encode" << std::endl;
    int ret;

    /* send the frame to the encoder */
    if (frame) {
        //printf("Send frame %3"PRId64"\n", frame->pts);
    }
    ret = avcodec_send_frame(enc_ctx, frame);
    if (ret < 0) {
        fprintf(stderr, "Error sending a frame for encoding\n");
        exit(1);
    }

    while (ret >= 0) {
        std::cout << "avcodec_receive_packet" << std::endl;
        ret = avcodec_receive_packet(enc_ctx, pkt);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
            return;
        else if (ret < 0) {
            fprintf(stderr, "Error during encoding\n");
            exit(1);
        }

        //printf("Write packet %3"PRId64" (size=%5d)\n", pkt->pts, pkt->size);
        fwrite(pkt->data, 1, pkt->size, outfile);
        av_packet_unref(pkt);
    }
}

void ffmpeng_encode_test()
{
    const char* filename, * codec_name;
    const AVCodec* codec;
    AVCodecContext* c = NULL;
    int i, ret, x, y;
    FILE* f;
    AVFrame* frame;
    AVPacket* pkt;
    uint8_t endcode[] = { 0, 0, 1, 0xb7 };
    filename = "nvenc_video.mp4";//argv[1];
    codec_name = "h264_nvenc";//argv[2];

    /* find the mpeg1video encoder */
    std::cout << "avcodec_find_encoder_by_name" << std::endl;
    codec = avcodec_find_encoder_by_name(codec_name);
    if (!codec) {
        fprintf(stderr, "Codec '%s' not found\n", codec_name);
        exit(1);
    }

    std::cout << "avcodec_alloc_context3" << std::endl;
    c = avcodec_alloc_context3(codec);
    if (!c) {
        fprintf(stderr, "Could not allocate video codec context\n");
        exit(1);
    }

    std::cout << "av_packet_alloc" << std::endl;
    pkt = av_packet_alloc();
    if (!pkt)
        exit(1);

    /* put sample parameters */
    c->bit_rate = 400000;
    /* resolution must be a multiple of two */
    c->width = 640;
    c->height = 960;
    /* frames per second */
    AVRational base;
    base.num = 1;
    base.den = 25;
    c->time_base = base;
    AVRational framerate;
    framerate.num = 25;
    framerate.den = 1;
    c->framerate = framerate;

    c->gop_size = 10;
    c->max_b_frames = 1;
    c->pix_fmt = AV_PIX_FMT_YUV420P;

    if (codec->id == AV_CODEC_ID_H264)
        av_opt_set(c->priv_data, "preset", "slow", 0);

    /* open it */
    std::cout << "avcodec_open2" << std::endl;
    ret = avcodec_open2(c, codec, NULL);
    if (ret < 0) {
        //printf(stderr, "Could not open codec: %s\n", av_err2str(ret));
        printf("Could not open codec\n");
        exit(1);
    }

    std::cout << " f = fopen" << std::endl;
    f = fopen(filename, "wb");
    if (!f) {
        fprintf(stderr, "Could not open %s\n", filename);
        exit(1);
    }

    std::cout << "av_frame_alloc" << std::endl;
    frame = av_frame_alloc();
    if (!frame) {
        fprintf(stderr, "Could not allocate video frame\n");
        exit(1);
    }
    frame->format = c->pix_fmt;
    frame->width = c->width;
    frame->height = c->height;

    std::cout << "av_frame_get_buffer" << std::endl;
    ret = av_frame_get_buffer(frame, 0);
    if (ret < 0) {
        fprintf(stderr, "Could not allocate the video frame data\n");
        exit(1);
    }

    /* encode 1 second of video */
    for (i = 0; i < 250; i++) {
        printf("---encode i = %d \n", i);
        fflush(stdout);
        ret = av_frame_make_writable(frame);
        if (ret < 0)
            exit(1);

         /* Y */
        for (y = 0; y < c->height; y++) {
            for (x = 0; x < c->width; x++) {
                frame->data[0][y * frame->linesize[0] + x] = x + y + i * 3;
            }
        }

        /* Cb and Cr */
        for (y = 0; y < c->height / 2; y++) {
            for (x = 0; x < c->width / 2; x++) {
                frame->data[1][y * frame->linesize[1] + x] = 128 + y + i * 2;
                frame->data[2][y * frame->linesize[2] + x] = 64 + x + i * 5;
            }
        }

        frame->pts = i;

        /* encode the image */
        encode(c, frame, pkt, f);
    }

    /* flush the encoder */
    encode(c, NULL, pkt, f);

    if (codec->id == AV_CODEC_ID_MPEG1VIDEO || codec->id == AV_CODEC_ID_MPEG2VIDEO)
        fwrite(endcode, 1, sizeof(endcode), f);
    fclose(f);

    avcodec_free_context(&c);
    av_frame_free(&frame);
    av_packet_free(&pkt);
}

#define RUN_NVENC_TEST 0
#define RUN_OCL_TEST   0
#define RUN_NVENC_OCL_TEST   1

int main()
{
	std::cout << "*****start nvenc opencl test*****" << std::endl;
    std::cout << av_version_info() << std::endl;
#if RUN_NVENC_TEST
    ffmpeng_encode_test();
#endif
#if RUN_OCL_TEST
    opencl_test();
#endif
#if RUN_NVENC_OCL_TEST
    std::thread t1(ffmpeng_encode_test);
    std::thread t2(opencl_test);
    t1.join();
    t2.join();
#endif
	std::cout << "*****end   nvenc opencl test*****" << std::endl;
	return 0;
}