Hello, I’m new to Cuda programming and I am trying it out implementing a simple histogram equalization algorithm. I already have had success equalizaing a grayscale image, but now i am trying to use the same logic to work on a color image but it doesn’t work and i really don’t know why. This is the code for converting the color space and calculating the histogram of the Y values (luminance)
import pycuda.autoinit
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
import numpy as np
import cv2
from time import perf_counter
# CUDA kernel for histogram equalization
kernel = """
__global__ void histogram_calc(unsigned char *input_image, unsigned char *output_image, int *hist_gpu, int width, int height)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
int index = y * width + x;
if (x < width && y < height)
{
int R = input_image[3*index];
printf("test");
int G = input_image[3*index + 1];
int B = input_image[3*index + 2];
int Y = 0.299 * R + 0.587 * G + 0.114 * B;
int U = -0.169 * R - 0.331 * G + 0.499 * B + 128;
int V = 0.499 * R - 0.418 * G - 0.0813 * B + 128;
output_image[3*index] = Y;
output_image[3*index + 1] = U;
output_image[3*index + 2] = V;
atomicAdd(&(hist_gpu[Y]), 1);
}
// Wait for all threads to finish updating the histogram
__syncthreads();
}
"""
if __name__ == "__main__":
# Load the image
input_image_path = "test.png"
input_image = cv2.imread(input_image_path)
#print(input_image)
times = np.array([])
for _ in range(1):
start_time = perf_counter()
height, width = input_image.shape[:2]
print(height)
print(width)
# Define the array for the histogram and the cdf
hist = np.zeros(256, dtype=np.int32)
cdf = np.zeros(256, dtype=np.int32)
# Allocate memory on the GPU
input_image_gpu = cuda.mem_alloc(input_image.nbytes)
output_image_gpu = cuda.mem_alloc(input_image.nbytes)
hist_gpu = cuda.mem_alloc(hist.nbytes)
cdf_gpu = cuda.mem_alloc(cdf.nbytes)
# Copy input image and histogram to the GPU
cuda.memcpy_htod(input_image_gpu, input_image)
# Compile the CUDA kernel and get a reference to the functions
mod = SourceModule(kernel)
histogram_calc_kernel = mod.get_function("histogram_calc")
# Set block and grid dimensions
block_dim = (16, 16, 1)
grid_dim = (int(np.ceil(width / block_dim[0])), int(np.ceil(height / block_dim[1])))
#print(hist)
# Make the histogram and copy it back to Host
histogram_calc_kernel(input_image_gpu, hist_gpu, np.int32(width), np.int32(height), block=block_dim, grid=grid_dim)
cuda.memcpy_dtoh(hist, hist_gpu)
print(hist)
# Calculate the cumulative distribution function (CDF) on CPU and copy it to Device
cdf[0] = hist[0];
for i in range(256):
cdf[i] = hist[i] + cdf[i-1]
print(cdf)
end_time = perf_counter()
#times.append(end_time - start_time)
# Save the output image
output_image_path = "output_image.jpg"
Basically it looks like it never goes inside the if condition in the kernel, but up until there it’s literally the same code i used for grayscale images so i’m kinda lost. Can someone have a look at this? Thanks very much
P.s: I run this on my gtx 960