Help Please,I created depth image in opengl and want to compare with another image using cuda
here is how I create texture
glGenTextures(1,&texture);
glBindTexture(GL_TEXTURE_2D,texture);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_GENERATE_MIPMAP, GL_TRUE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT32F, TEXTURE_WIDTH, TEXTURE_HEIGHT, 0, GL_DEPTH_COMPONENT, GL_FLOAT, 0);
glBindTexture(GL_TEXTURE_2D,0);
I register texture using cudaGraphicsGLRegisterImage()
however it crash when I want to copy my result back from computation, at line
cudaMemcpyToArray(dstArray,0,0,g_dstBuffer,bufferSize,cudaMemcpyDeviceToDevice);
the problem seem to the size of the array as I don’t know the data type of texture, I try random data type,but not successful Please Help !!
here is the full code
#include <cuda_runtime_api.h>
#include <cuda.h>
#include "cutil.h"
#include "cutil_inline_runtime.h"
#include "math_functions.h"
#define __cplusplus
#define __CUDACC__
#include "texture_fetch_functions.h"
#include "cuda_texture_types.h"
#define BLOCK_SIZE 16
float4* g_dstBuffer = NULL;
float4* g_srcBuffer = NULL;
size_t g_BufferSize =0;
texture<float4, cudaTextureType2D, cudaReadModeElementType> texRef;
__global__ void postProcessKernel(float4* dst, float4* src,unsigned int imgWidth, unsigned int imgHeight)
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
if(x>imgWidth || y >imgHeight) return;
unsigned int index = y*imgWidth + x;
float4 tempcolor;
float4 color = tex2D(texRef,x,y);
tempcolor = color;
dst[index] = tempcolor;
__syncthreads();
return;
}
void postProcessCUDA(cudaGraphicsResource_t &dst, cudaGraphicsResource_t &src1, cudaGraphicsResource_t &src2, unsigned int width, unsigned int height)
{
cudaGraphicsResource_t resources[3] = {src1,src2,dst};
cudaGraphicsMapResources(3,resources) ;
cudaArray* src1Array;
cudaArray* src2Array;
cudaArray* dstArray;
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(4,0,0,0,cudaChannelFormatKindFloat);
const textureReference* texRefPtr;
cudaGetTextureReference(&texRefPtr,"texRef");
cudaGraphicsSubResourceGetMappedArray (&src1Array,src1,0,0);
cudaGraphicsSubResourceGetMappedArray (&src2Array,src2,0,0);
cudaGraphicsSubResourceGetMappedArray (&dstArray,dst,0,0);
cudaBindTextureToArray( texRefPtr,src1Array,&channelDesc);
size_t bufferSize = width*height*sizeof(float4);
if(g_BufferSize != bufferSize)
{
if(g_dstBuffer !=NULL)
{
cudaFree(g_dstBuffer);
}
if(g_srcBuffer != NULL)
{
cudaFree(g_srcBuffer);
}
g_BufferSize = bufferSize;
cudaMalloc((void**) &g_dstBuffer, g_BufferSize);
cudaMalloc((void**) &g_srcBuffer,g_BufferSize);
}
size_t blocksW = (size_t) ceilf (width /(float) BLOCK_SIZE );
size_t blocksH = (size_t) ceilf( height / (float) BLOCK_SIZE);
dim3 gridDim( blocksW,blocksH,1);
dim3 blockDim(BLOCK_SIZE,BLOCK_SIZE,1);
//cudaMemcpyFromArray(g_srcBuffer,src1Array,0,0,bufferSize,cudaMemcpyDeviceToDevice);
postProcessKernel<<<gridDim,blockDim>>>(g_dstBuffer,g_srcBuffer,width,height);
cudaError_t b;
cudaMemcpyToArray(dstArray,0,0,g_dstBuffer,bufferSize,cudaMemcpyDeviceToDevice);
cudaUnbindTexture (texRefPtr);
cudaGraphicsUnmapResources(3,resources);
return ;
}