Hello all, i have implemented VHGW algorithm for faster morphological image processing(dilation and erosion).
I have implemented using matlab and by using parallel computing toolbox of matlab. BUt the problem is that by using MATLAB parallel computing toolbox, the gpu performance is slower than cpu version. This should be not the case.
I just want someone please implement this piece of code using cuda_7.5.
Below is i am writing matlab piece of code:-
clc;
clear vars;
close all;
img = rgb2gray(imread(‘D:\Desktop Works\TexturesCom_AnimalsVarious0015_1_S.jpg’));
%img = [114 16 251 37];
p = input(‘Enter the Structure element size(must be odd):’);
[rows, cols] = size(img);
%disp(mod(cols,p));
%img = gpuArray(img);
if(mod(cols, p) == 0)
left_pad = 0;
right_pad = 0;
else
pad = p - (mod(cols, p));
left_pad = pad/2;
right_pad = pad - left_pad;
end
left_pad = ceil(left_pad + (p - 1)/2);
right_pad = ceil(right_pad + (p - 1)/2);
ncols = cols +left_pad+right_pad;
%proc_mat = zeros([rows, cols]);
R = (zeros([1524, 1524]));
S = (zeros([1524, 1524]));
proc = (zeros([1524, 1524]));
result = (zeros([rows, ncols]));
output = (zeros([rows, cols]));
tic;
start = 1;
to_end = cols;
for i = 1: rows
for j = to_end:-1:1
proc(i,(j+left_pad)) = img(i,j);
end
end
for i = 1:rows
for j = start: left_pad
proc(i,j) = -128;
end
end
start = cols+left_pad+1;
to_end = start+right_pad-1;
for i = 1 : rows
for j = start:to_end
proc(i,j) = -128;
end
end
for i = 1 : rows
count = p;
for j = 1 : ncols - p + 1
count = count - 1;
maxNo = proc(i,j);
t = 0;
for k = 1 : count
t = t + 1;
num = proc(i, (j + t));
if(maxNo < num)
maxNo = num;
end
end
R(i,j) = maxNo;
if(count == 0)
count = p;
end
end
end
for i = 1 : rows
count = 0;
for j = p:ncols
maxNo = proc(i,j);
t = 0;
for k = 1:count
%t = t - 1;
t = t + 1;
num = proc(i, (j - t));
if(maxNo < num)
maxNo = num;
end
end
S(i, j) = maxNo;
count = count + 1;
%t = t + 1;
if count == p
count = 0;
%t = 0;
end
end
end
index = (p - 1)/2;
for i = 1: rows
for j = index+1: ncols - index
result(i, j) = max(R(i, (j - index)), S(i,(j + index)));
end
end
for i = 1:rows
for j = 1 : cols
output(i, j) = result(i, (j+left_pad));
end
end
val = toc;
imshow(mat2gray(output));
disp(num2str(val));
img1 = img’;
[rows, cols] = size(img1);
if(mod(cols, p) == 0)
left_pad = 0;
right_pad = 0;
else
pad = p - (mod(cols, p));
left_pad = pad/2;
right_pad = pad - left_pad;
end
left_pad = ceil(left_pad + (p - 1)/2);
right_pad = ceil(right_pad + (p - 1)/2);
ncols = cols +left_pad+right_pad;
R = (zeros([1524, 1524]));
S = (zeros([1524, 1524]));
proc = (zeros([1524, 1524]));
result = (zeros([rows, ncols]));
output = (zeros([rows, cols]));
tic;
start = 1;
to_end = cols;
for i = 1: rows
for j = to_end:-1:1
proc(i,(j+left_pad)) = img1(i,j);
end
end
for i = 1:rows
for j = start: left_pad
proc(i,j) = -128;
end
end
start = cols+left_pad+1;
to_end = start+right_pad-1;
for i = 1 : rows
for j = start:to_end
proc(i,j) = -128;
end
end
for i = 1 : rows
count = p;
for j = 1 : ncols - p + 1
count = count - 1;
maxNo = proc(i,j);
t = 0;
for k = 1 : count
t = t + 1;
num = proc(i, (j + t));
if(maxNo < num)
maxNo = num;
end
end
R(i,j) = maxNo;
if(count == 0)
count = p;
end
end
end
for i = 1 : rows
count = 0;
for j = p:ncols
maxNo = proc(i,j);
t = 0;
for k = 1:count
%t = t - 1;
t = t + 1;
num = proc(i, (j - t));
if(maxNo < num)
maxNo = num;
end
end
S(i, j) = maxNo;
count = count + 1;
%t = t + 1;
if count == p
count = 0;
%t = 0;
end
end
end
index = (p - 1)/2;
for i = 1: rows
for j = index+1: ncols - index
result(i, j) = max(R(i, (j - index)), S(i,(j + index)));
end
end
for i = 1:rows
for j = 1 : cols
output(i, j) = result(i, (j+left_pad));
end
end
output = output’;
val = toc;
disp(num2str(val));