Hello all, i have implemented VHGW algorithm for faster morphological image processing(dilation and erosion).

I have implemented using matlab and by using parallel computing toolbox of matlab. BUt the problem is that by using MATLAB parallel computing toolbox, the gpu performance is slower than cpu version. This should be not the case.

I just want someone please implement this piece of code using cuda_7.5.

Below is i am writing matlab piece of code:-

clc;

clear vars;

close all;

img = rgb2gray(imread(‘D:\Desktop Works\TexturesCom_AnimalsVarious0015_1_S.jpg’));

%img = [114 16 251 37];

p = input(‘Enter the Structure element size(must be odd):’);

[rows, cols] = size(img);

%disp(mod(cols,p));

%img = gpuArray(img);

if(mod(cols, p) == 0)

left_pad = 0;

right_pad = 0;

else

pad = p - (mod(cols, p));

left_pad = pad/2;

right_pad = pad - left_pad;

end

left_pad = ceil(left_pad + (p - 1)/2);

right_pad = ceil(right_pad + (p - 1)/2);

ncols = cols +left_pad+right_pad;

%proc_mat = zeros([rows, cols]);

R = (zeros([1524, 1524]));

S = (zeros([1524, 1524]));

proc = (zeros([1524, 1524]));

result = (zeros([rows, ncols]));

output = (zeros([rows, cols]));

tic;

start = 1;

to_end = cols;

for i = 1: rows

for j = to_end:-1:1

proc(i,(j+left_pad)) = img(i,j);

end

end

for i = 1:rows

for j = start: left_pad

proc(i,j) = -128;

end

end

start = cols+left_pad+1;

to_end = start+right_pad-1;

for i = 1 : rows

for j = start:to_end

proc(i,j) = -128;

end

end

for i = 1 : rows

count = p;

for j = 1 : ncols - p + 1

count = count - 1;

maxNo = proc(i,j);

t = 0;

for k = 1 : count

t = t + 1;

num = proc(i, (j + t));

if(maxNo < num)

maxNo = num;

end

end

R(i,j) = maxNo;

```
if(count == 0)
count = p;
end
end
```

end

for i = 1 : rows

count = 0;

for j = p:ncols

maxNo = proc(i,j);

t = 0;

for k = 1:count

%t = t - 1;

t = t + 1;

num = proc(i, (j - t));

if(maxNo < num)

maxNo = num;

end

```
end
S(i, j) = maxNo;
count = count + 1;
%t = t + 1;
if count == p
count = 0;
%t = 0;
end
end
```

end

index = (p - 1)/2;

for i = 1: rows

for j = index+1: ncols - index

result(i, j) = max(R(i, (j - index)), S(i,(j + index)));

end

end

for i = 1:rows

for j = 1 : cols

output(i, j) = result(i, (j+left_pad));

end

end

val = toc;

imshow(mat2gray(output));

disp(num2str(val));

img1 = img’;

[rows, cols] = size(img1);

if(mod(cols, p) == 0)

left_pad = 0;

right_pad = 0;

else

pad = p - (mod(cols, p));

left_pad = pad/2;

right_pad = pad - left_pad;

end

left_pad = ceil(left_pad + (p - 1)/2);

right_pad = ceil(right_pad + (p - 1)/2);

ncols = cols +left_pad+right_pad;

R = (zeros([1524, 1524]));

S = (zeros([1524, 1524]));

proc = (zeros([1524, 1524]));

result = (zeros([rows, ncols]));

output = (zeros([rows, cols]));

tic;

start = 1;

to_end = cols;

for i = 1: rows

for j = to_end:-1:1

proc(i,(j+left_pad)) = img1(i,j);

end

end

for i = 1:rows

for j = start: left_pad

proc(i,j) = -128;

end

end

start = cols+left_pad+1;

to_end = start+right_pad-1;

for i = 1 : rows

for j = start:to_end

proc(i,j) = -128;

end

end

for i = 1 : rows

count = p;

for j = 1 : ncols - p + 1

count = count - 1;

maxNo = proc(i,j);

t = 0;

for k = 1 : count

t = t + 1;

num = proc(i, (j + t));

if(maxNo < num)

maxNo = num;

end

end

R(i,j) = maxNo;

```
if(count == 0)
count = p;
end
end
```

end

for i = 1 : rows

count = 0;

for j = p:ncols

maxNo = proc(i,j);

t = 0;

for k = 1:count

%t = t - 1;

t = t + 1;

num = proc(i, (j - t));

if(maxNo < num)

maxNo = num;

end

```
end
S(i, j) = maxNo;
count = count + 1;
%t = t + 1;
if count == p
count = 0;
%t = 0;
end
end
```

end

index = (p - 1)/2;

for i = 1: rows

for j = index+1: ncols - index

result(i, j) = max(R(i, (j - index)), S(i,(j + index)));

end

end

for i = 1:rows

for j = 1 : cols

output(i, j) = result(i, (j+left_pad));

end

end

output = output’;

val = toc;

disp(num2str(val));