Hello,
I have been trying to load and read a texture using CUDA. I have been trying to understand the API for a week now and there are some explanations about the API I really don’t understand. I am trying to read an image from a 3 channel RGB file using the opencv lib and load the image as a texture to perform some image processing on it. I am having trouble just reading it in and out. Please take a look at my code and let me know what I am doing wrong…
using namespace cv;
cudaTextureObject_t texObjLinear;
constexpr bool CV_SUCCESS = true;
cudaArray* d_imageArray = 0;
size_t pitch;
uchar* d_img;
#define checkCudaErrors(call) { \
const cudaError_t error = call; \
if (error != cudaSuccess) {\
printf("Error : %s:%d, ", __FILE__, __LINE__); \
printf("code:%d, reason : %s\n", error, cudaGetErrorName(error)); \
exit(-10 * error);\
} \
} \
#define CHECK_CV(call) { \
const bool error = call; \
if (error != CV_SUCCESS) {\
printf("Error : %s:%d, ", __FILE__, __LINE__); \
printf("code:%d, reason\n", error); \
exit(-10 * error);\
} \
} \
__global__ void extractGradients(uchar* output, const cudaTextureObject_t texObj, int width, int height) {
// Calculate normalized texture coordinates
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
float u = x / (float)width;
float v = y / (float)height;
// Transform coordinates
//u -= 0.5f;
//v -= 0.5f;
printf("%d \n", tex2D<uchar>(texObj, x * 3, y));
//B G R stride
output[y * (width * 3) + (x * 3)] = tex2D<uchar>(texObj, x * 3, y);
output[y * (width * 3) + (x * 3) + 1] = tex2D<uchar>(texObj, x * 3 + 1, y);
output[y * (width * 3) + (x * 3) + 2] = tex2D<uchar>(texObj, x * 3 + 2, y);
}
void initTexture(int w, int h, cv::Mat& _img) {
// allocate array and copy image data
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned);
//cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
checkCudaErrors(cudaMallocArray(&d_imageArray, &channelDesc, w, h));
checkCudaErrors(cudaMemcpy2DToArray(d_imageArray, 0, 0, _img.data, w * sizeof(uchar) * 3,
w * sizeof(uchar) * 3, h, cudaMemcpyHostToDevice));
cudaResourceDesc texRes;
memset(&texRes, 0, sizeof(cudaResourceDesc));
texRes.resType = cudaResourceTypeArray;
texRes.res.array.array = d_imageArray;
cudaTextureDesc texDescr;
memset(&texDescr, 0, sizeof(cudaTextureDesc));
texDescr.normalizedCoords = false;
texDescr.filterMode = cudaFilterModePoint;
texDescr.addressMode[0] = cudaAddressModeClamp;
texDescr.addressMode[1] = cudaAddressModeClamp;
//texDescr.readMode = cudaReadModeNormalizedFloat;
texDescr.readMode = cudaReadModeElementType;
checkCudaErrors(cudaCreateTextureObject(&texObjLinear, &texRes, &texDescr, NULL));
}
void initTexture2(int w, int h, cv::Mat& _img) {
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned);
checkCudaErrors(cudaMallocPitch(&d_img, &pitch, _img.step1(), h));
checkCudaErrors(cudaMemcpy2D(d_img, pitch, _img.data, _img.step1(),
w * 3 * sizeof(uchar), h, cudaMemcpyHostToDevice));
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
cudaResourceDesc texRes;
memset(&texRes, 0, sizeof(cudaResourceDesc));
texRes.resType = cudaResourceTypePitch2D;
texRes.res.pitch2D.devPtr = d_img;
texRes.res.pitch2D.desc = desc;
texRes.res.pitch2D.width = w;
texRes.res.pitch2D.height = h;
texRes.res.pitch2D.pitchInBytes = pitch;
cudaTextureDesc texDescr;
memset(&texDescr, 0, sizeof(cudaTextureDesc));
texDescr.normalizedCoords = false;
texDescr.filterMode = cudaFilterModePoint;
texDescr.addressMode[0] = cudaAddressModeClamp;
texDescr.addressMode[1] = cudaAddressModeClamp;
texDescr.readMode = cudaReadModeElementType;
//texDescr.readMode = cudaReadModeNormalizedFloat;
checkCudaErrors(cudaCreateTextureObject(&texObjLinear, &texRes, &texDescr, NULL));
}
int main() {
try {
returnGPUCudaInfoResources(0);
//const std::string filename = "C:\\Users\\karratdev\\Projects\\DelauneyTesselation\\standard_test_images\\standard_test_images\\lena_color_256.tif";
const std::string filename = ".\\standard_test_images\\standard_test_images\\lena_color_256.tif";
cv::Mat image = imread(filename, IMREAD_COLOR);
/*
uchar* testOutImage;
testOutImage = (uchar*)malloc(image.cols * image.rows * sizeof(uchar) * 3);
memcpy(testOutImage, image.data, image.cols * image.rows * sizeof(uchar) * 3);
cv::Mat imageOutTest = cv::Mat(image.rows, image.cols, CV_8UC3, testOutImage);
CHECK_CV(imwrite(".\\standard_test_images\\standard_test_images\\lena_color_256_test.png", imageOutTest));
*/
if (image.empty()) {
printf("Cannot read image file: %s\n", filename.c_str());
return -1;
}
Point pt; pt.x = 10; pt.y = 8;
cv::circle(image, pt, 2, 1);
//initTexture(image.cols, image.rows, image);
initTexture2(image.cols, image.rows, image);
// Allocate result of transformation in device memory
uchar* d_output;
checkCudaErrors(cudaMalloc((void **) &d_output, image.cols * image.rows * sizeof(uchar) * 3));
uchar* gpuRef;
gpuRef = (uchar*)malloc(image.cols * image.rows * sizeof(uchar) * 3);
// Invoke kernel
dim3 dimBlock(16, 16);
dim3 dimGrid((image.cols + dimBlock.x - 1) / dimBlock.x,
(image.rows + dimBlock.y - 1) / dimBlock.y);
printf("Kernel Dimension :\n Block size : %i , %i \n Grid size : %i , %i",
dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y);
extractGradients <<< dimGrid, dimBlock >>> (d_output, texObjLinear, image.cols, image.rows);
checkCudaErrors(cudaMemcpy(gpuRef, d_output, image.cols * image.rows * sizeof(uchar) * 3, cudaMemcpyDeviceToHost));
cv::Mat imageOut = cv::Mat(image.rows, image.cols, CV_8UC3, gpuRef);
CHECK_CV(imwrite(".\\standard_test_images\\standard_test_images\\lena_color_256.jpg", imageOut));
if (CV_SUCCESS == true) printf("\nSuccess !");
}
catch (Exception ex) {
std::cerr << ex.what() << std::endl;
}
return 0;
}
This should be easy, I come from openCL and I didn’t have any problems but I am not understanding the texture CUDA system.
Here is a sample of my output :
This is my original lena RGB :
lena_color_256.tif (192.4 KB)