#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "device_functions.h" #include "../common/gpudirect/trace.h" #include "./common.h" #include "common.c" #include #include "cuda_gl_interop.h" #include #include #include "cuda_texture_types.h" #include #include using namespace std; using namespace cv; Mat test_cuda_function(unsigned char *tmp_cuda, uint32_t height, uint32_t width); void test_cudaArray(int width, int height, cudaArray* devArray); #ifdef __linux__ #pragma GCC diagnostic ignored "-Wwrite-strings" #endif #define PHX_CHECK(call) { \ etStat eStat = call; \ if (PHX_OK != eStat) { \ stringstream sstrMsg; \ sstrMsg << "Error in PHX function, line " \ << __LINE__; \ throw runtime_error(sstrMsg.str()); \ } \ } #define CUDA_CHECK(call) { \ CUresult eStatus = call; \ if (CUDA_SUCCESS != eStatus) { \ stringstream sstrMsg; \ sstrMsg << "Cuda failed with status " \ << "0x" << hex << eStatus \ << " in line " \ << dec << __LINE__; \ throw runtime_error(sstrMsg.str()); \ } \ } #define BUFFER_CHECK(call) { \ etBufferStat eStatus = call; \ if (BUFFER_STAT_OK != eStatus) { \ stringstream sstrMsg; \ sstrMsg << "Buffer error 0x " \ << hex << eStatus \ << " in line " \ << dec << __LINE__; \ throw runtime_error(sstrMsg.str()); \ } \ } /* phx_callback() * PHX callback function. Handles the acquisition interrupt events. */ static void phx_callback( tHandle hCamera, /* Camera handle */ ui32 dwInterruptMask, /* Interrupt mask */ void *pvParams /* Pointer to user supplied context */ ) { (void) hCamera; if (PHX_INTRPT_BUFFER_READY & dwInterruptMask) { /* New buffer has been acquired and is ready to be processed */ (*(int*) pvParams)++; } } void AllocCudaBuffers( CUdeviceptr *pBuffers, unsigned int uNumBuffers, uint32_t dwWidth, uint32_t dwHeight, uint32_t dwNumChannels, int nBytesPerChannel ) { if (!pBuffers) { throw runtime_error("Invalid parameter - null pointer."); } for (unsigned int i = 0; i < uNumBuffers; i++) { CUDA_CHECK(cuMemAlloc(&pBuffers[i], dwWidth * dwHeight * dwNumChannels * nBytesPerChannel)); } } void FreeCudaBuffers( CUdeviceptr *pBuffers, unsigned int uNumBuffers ) { for (unsigned int i = 0; i < uNumBuffers; i++) { if (pBuffers[i]) { CUDA_CHECK(cuMemFree(pBuffers[i])); } } } /* InitAcqBuf() * Initialise the acquisition buffers used by the PHX library. */ stImageBuff * InitAcqBuf( ui32 dwAcqNumImages, CUdeviceptr *pcudaInput, tHandle hCamera ) { if (!pcudaInput) { throw runtime_error("Invalid parameter - null pointer."); } stImageBuff *pImageBuff = new stImageBuff[dwAcqNumImages + 1]; uint32_t dw = 0; /* Create and initialise the buffers that will be used for GPU DMA */ for (; dw < dwAcqNumImages; ++dw) { pImageBuff[dw].pvAddress = (void*) pcudaInput[dw]; } /* Zero terminate the array of buffers */ pImageBuff[dw].pvAddress = 0; pImageBuff[dw].pvContext = 0; return pImageBuff; } /* RenderCUDABuffers() * Process CUDA buffers here. This function currently performs a data copy on the device. */ void RenderCUDABuffers( CUdeviceptr pSrc, CUdeviceptr pDst, uint32_t dwWidth, uint32_t dwHeight, uint32_t dwNumChannels, int nBytesPerChannel ) { CUDA_MEMCPY2D cpy; cpy.srcXInBytes = cpy.dstXInBytes = 0*4; cpy.srcY = cpy.dstY = 0; cpy.srcPitch = dwWidth * dwNumChannels * nBytesPerChannel; cpy.dstPitch = dwWidth * dwNumChannels * nBytesPerChannel; cpy.Height = dwHeight; cpy.WidthInBytes = dwWidth * dwNumChannels * nBytesPerChannel; cpy.srcMemoryType = CU_MEMORYTYPE_DEVICE; cpy.srcDevice = (CUdeviceptr)pSrc; cpy.dstDevice = (CUdeviceptr)pDst; cpy.dstMemoryType = CU_MEMORYTYPE_DEVICE; CUDA_CHECK(cuMemcpy2DAsync(&cpy, 0)); } CUdeviceptr GetNextOutputCudaBuffer( CUdeviceptr *cudaBuffers, uint32_t dwCudaBuffersCount ) { if (!cudaBuffers) { throw runtime_error("Invalid parameter - null pointer."); } static uint32_t dwPointer = 0; CUdeviceptr Buffer = cudaBuffers[dwPointer]; // printf("cudaBuffers[dwPointer] = %x, dwPointer = %x\n", dwPointer, cudaBuffers[dwPointer]); ++dwPointer; if (dwPointer >= dwCudaBuffersCount) dwPointer = 0; return Buffer; } cudaGraphicsResource_t cudaResource[1]; GLuint textureID[1]; cudaArray* devArray; #define WINDOW_WIDTH 2592 #define WINDOW_HEIGHT 2048 PFNGLBINDBUFFERARBPROC glBindBuffer = NULL; PFNGLDELETEBUFFERSARBPROC glDeleteBuffers = NULL; PFNGLGENBUFFERSARBPROC glGenBuffers = NULL; PFNGLBUFFERDATAARBPROC glBufferData = NULL; #define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) inline void __checkCudaErrors( CUresult err, const char *file, const int line ) { if( CUDA_SUCCESS != err) { fprintf(stderr, "CUDA Driver API error = %04d from file <%s>, line %i.\n", err, file, line ); exit(-1); } } int tmp_counter = 0; void myDisplay(void) { glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // glDrawPixels(WINDOW_WIDTH, WINDOW_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, 0); glBegin(GL_QUADS); //纹理的坐标和四边形顶点的对应，可以通过设置四边形的位置调整图像在窗体的位置 glTexCoord2f(0.0, 0.0); glVertex3f(-1.0, 1.0, 0.0); glTexCoord2f(0.0, 1.0); glVertex3f(-1.0, -1.0, 0.0); glTexCoord2f(1.0, 1.0); glVertex3f(1.0, -1.0, 0.0); glTexCoord2f(1.0, 0.0); glVertex3f(1.0, 1.0, 0.0); glEnd(); glFlush(); glutSwapBuffers(); glutPostRedisplay(); _PHX_SleepMs(1); } CUdevice device; CUfunction function_test; CUmodule module; char *kernel_name = (char*) "cuda_run_driver_kernel_test"; char *module_file = (char*) "test_cuda_tmp.ptx"; CUcontext context; char *module_name_draw_sample = (char*) "cudaGLKernel"; uchar4* devPtr; uchar4* devPtrTemp; CUdeviceptr *cudaBuffersOutput = 0; uint8_t* cudaBufferData; const uint32_t dwNumChannels = 1; tCxpRegisters sCameraRegs = {0}; tPhxCmd sPhxCmd; tHandle hCamera; ui32 dwBufferWidth; ui32 dwBufferHeight; ui32 dwAcqNumImages; ui32 dwBufDstXLength; etParamValue eParamValue; volatile int nCurrentEventCount = 0; int nLastEventCount = 0; CUarray_format cudaArrayFormat; CUarray *cudaArraysInput = 0; CUdeviceptr *cudaBuffersInput = 0; CUarray *cudaArraysOutput = 0; stImageBuff *pImageBuff = 0; bool fErrorOccurred = false; void drawImage(uchar4* devPtr, CUdeviceptr* cudaBuffersOutput, int width, int height) { void *args[4] = { &devPtr, cudaBuffersOutput, &width, &height }; dim3 grids(WINDOW_WIDTH/16, WINDOW_HEIGHT/16); dim3 threads(16, 16); cuLaunchKernel(function_test, grids.x, grids.y, 1, // Nx1x1 blocks threads.x, threads.y, 1, // 1x1x1 threads 0, 0, args, 0); } void launchKernelInit() { cudaMalloc((void **)&devPtr, WINDOW_WIDTH * WINDOW_HEIGHT * sizeof(uchar4)); CUresult err = cuInit(0); err = cuCtxCreate(&context, 0, device); if (err != CUDA_SUCCESS) { fprintf(stderr, "* Error initializing the CUDA context.\n"); cuCtxDetach(context); exit(-1); } } void launchKernel() { CUresult err = cuInit(0); err = cuModuleLoad(&module, module_file); if (err != CUDA_SUCCESS) { fprintf(stderr, "* Error loading the module %s\n", module_file); cuCtxDetach(context); exit(-1); } err = cuModuleGetFunction(&function_test, module, module_name_draw_sample); if (err != CUDA_SUCCESS) { fprintf(stderr, "* Error getting kernel function %s\n", module_name_draw_sample); cuCtxDetach(context); exit(-1); } drawImage(devPtr, cudaBuffersOutput, WINDOW_WIDTH, WINDOW_HEIGHT); cudaMemcpyToArray(devArray, 0, 0, (uchar4*)devPtr, WINDOW_WIDTH * WINDOW_HEIGHT * sizeof(uchar4), cudaMemcpyDeviceToDevice); } void opengl_init(int argc, char *argv[]) { glutInit(&argc, argv); glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA); glutInitWindowSize(WINDOW_WIDTH, WINDOW_HEIGHT); glutInitWindowPosition(1200, 200); glutCreateWindow("imgshow"); glEnable(GL_TEXTURE_2D);//开启2d上下文，告诉OpenGL要绘制的是2d的纹理 glGenTextures(1, textureID);//生成1个2d上下文对象 glBindTexture(GL_TEXTURE_2D, textureID[0]);//绑定2d上下文 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);//设置2d上下文对象的插值方式 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, WINDOW_WIDTH, WINDOW_HEIGHT, 0, GL_BGR_EXT, GL_UNSIGNED_BYTE, NULL); cudaError_t err = cudaGraphicsGLRegisterImage(&cudaResource[0], textureID[0], GL_TEXTURE_2D, cudaGraphicsRegisterFlagsNone); if (err != cudaSuccess) { std::cout << "cudaGraphicsGLRegisterImage: " << err << "Line: " << __LINE__; return; } cudaGraphicsMapResources(1, cudaResource, 0); } int getCudaBufferInit(int argc, char *argv[]) { try { /*Initialize trace*/ InitTrace(); /* Parse command line for frame grabber configuration options */ PHX_CHECK(PhxCommonParseCmd(argc, argv, &sPhxCmd)); if (NULL != sPhxCmd.pszConfigFileName) { PHX_CHECK(PhxCommonParseCxpRegs(sPhxCmd.pszConfigFileName, &sCameraRegs)); } /* Configure acquisition */ PHX_CHECK(PHX_Create(&hCamera, PHX_ErrHandlerDefault)); PHX_CHECK(PHX_ParameterSet(hCamera, PHX_CONFIG_FILE, &sPhxCmd.pszConfigFileName)); PHX_CHECK(PHX_ParameterSet(hCamera, PHX_BOARD_NUMBER, &sPhxCmd.eBoardNumber)); PHX_CHECK(PHX_ParameterSet(hCamera, PHX_CHANNEL_NUMBER, &sPhxCmd.eChannelNumber)); PHX_CHECK(PHX_Open(hCamera)); PHX_CHECK(PHX_ParameterGet(hCamera, PHX_ROI_XLENGTH, &dwBufferWidth)); PHX_CHECK(PHX_ParameterGet(hCamera, PHX_BUF_DST_XLENGTH, &dwBufDstXLength)); PHX_CHECK(PHX_ParameterGet(hCamera, PHX_BUF_DST_YLENGTH, &dwBufferHeight)); PHX_CHECK(PHX_ParameterGet(hCamera, PHX_ACQ_NUM_BUFFERS, &dwAcqNumImages)); etParamValue ePixelFormat; PHX_CHECK(PHX_ParameterGet(hCamera, PHX_BUS_FORMAT, &ePixelFormat)); int nBitDepth; int nBytesPerChannel; switch (ePixelFormat) { case PHX_BUS_FORMAT_MONO8: nBitDepth = 8; nBytesPerChannel = 1; cudaArrayFormat = CU_AD_FORMAT_UNSIGNED_INT8; break; case PHX_BUS_FORMAT_MONO10: nBitDepth = 16; nBytesPerChannel = 2; cudaArrayFormat = CU_AD_FORMAT_UNSIGNED_INT16; break; case PHX_BUS_FORMAT_MONO12: nBitDepth = 16; nBytesPerChannel = 2; cudaArrayFormat = CU_AD_FORMAT_UNSIGNED_INT16; break; case PHX_BUS_FORMAT_MONO14: nBitDepth = 16; nBytesPerChannel = 2; cudaArrayFormat = CU_AD_FORMAT_UNSIGNED_INT16; break; case PHX_BUS_FORMAT_MONO16: nBitDepth = 16; nBytesPerChannel = 2; cudaArrayFormat = CU_AD_FORMAT_UNSIGNED_INT16; break; default: throw runtime_error("Error: the pixel format defined in the configuration" " file is not currently supported.\n"); break; } /* Check if camera is CXP */ tFlag fCameraIsCxp; if (PHX_OK != PhxCommonIsCxp(hCamera, &fCameraIsCxp)) { throw runtime_error("Failed retrieving Camera Interface Type."); } /* Initialise CUDA context and set it as current */ CUcontext cuCtx; CUDA_CHECK(cuInit(0)); CUDA_CHECK(cuCtxCreate(&cuCtx,0,0)); CUDA_CHECK(cuCtxSetCurrent(cuCtx)); cudaBuffersInput = new CUdeviceptr[dwAcqNumImages]; cudaBuffersOutput = new CUdeviceptr[dwAcqNumImages]; AllocCudaBuffers(cudaBuffersInput, dwAcqNumImages, dwBufferWidth, dwBufferHeight, dwNumChannels, nBytesPerChannel); AllocCudaBuffers(cudaBuffersOutput, dwAcqNumImages, dwBufferWidth, dwBufferHeight, dwNumChannels, nBytesPerChannel); pImageBuff = InitAcqBuf(dwAcqNumImages,cudaBuffersInput, hCamera); if (!pImageBuff) { printf("Failed allocating PHX acquisition buffers.\n"); return 1; } /* Setup virtual buffers */ PHX_CHECK(PHX_ParameterSet(hCamera, PHX_DST_PTRS_VIRT, pImageBuff)); eParamValue = PHX_DST_PTR_USER_CUDA; PHX_CHECK(PHX_ParameterSet(hCamera, (etParam)(PHX_DST_PTR_TYPE | PHX_CACHE_FLUSH | PHX_FORCE_REWRITE), &eParamValue)); PHX_CHECK(PHX_ParameterSet(hCamera, PHX_EVENT_CONTEXT, (void *) &nCurrentEventCount)); /*Enable Buffer Start interupt*/ etParamValue pValue = PHX_INTRPT_FRAME_START; PHX_CHECK(PHX_ParameterSet(hCamera, PHX_INTRPT_SET, &pValue)); pValue = PHX_INTRPT_FRAME_END; PHX_CHECK(PHX_ParameterSet(hCamera, PHX_INTRPT_SET, &pValue)); /* Start acquisition */ PHX_CHECK(PHX_StreamRead(hCamera, PHX_START, reinterpret_cast(phx_callback))); if (fCameraIsCxp && sCameraRegs.dwAcqStartAddress) { PHX_CHECK(PhxCommonWriteCxpReg(hCamera, sCameraRegs.dwAcqStartAddress, sCameraRegs.dwAcqStartValue, 800)); } launchKernelInit(); while (!PhxCommonKbHit()) { clock_t start,end; start = clock(); /* Get buffer from video device */ if (nCurrentEventCount != nLastEventCount) { /*Enable TimeStamping for cuda processing*/ SETUP_TRACE; START_TRACE; clock_t start_tmp,end_tmp; if (nLastEventCount == 0) { start_tmp = clock(); } if (!(nLastEventCount % 100) && nLastEventCount > 0) { printf("Processed %d images.\r\n", nLastEventCount); end_tmp = clock(); //结束时间 cout<<"Processed %d images. time = "<<1000*double(end_tmp-start_tmp)/ CLOCKS_PER_SEC<<"ms"<