CudaMallocManaged with OpenCV Mat

I wrote a sample program to understand the cudamallocmanaged functionality with OpenCV Mat& gpumat. But my result is not success.

Sample code provided.
#include
#include
#include<opencv2/core/core.hpp>
#include “opencv2/cudastereo.hpp”
#include “opencv2/cudaarithm.hpp”
#include<opencv2/highgui/highgui.hpp>
#include<cuda_runtime.h>
#include <sys/time.h>
#include <math.h>

using namespace cv;

int main(void)
{
  int rows = 480;
  int cols = 1280;
  float* h_a, *h_b, *h_c;

//Allocate memory for device pointers
cudaMallocManaged(&h_a, sizeof(float)rowscols);
cudaMallocManaged(&h_b, sizeof(float)rowscols);
cudaMallocManaged(&h_c, sizeof(float)rowscols);

//Mats (declaring them using pointers)
Mat hmat_a(Size(cols, rows), CV_32F, h_a);
hmat_a = imread("/home/vishnu/Desktop/color.png", 0);
Mat hmat_b(Size(cols, rows), CV_32F, h_b);
hmat_b = imread("/home/vishnu/Desktop/color.png", 0);
Mat hmat_c(Size(cols, rows), CV_32F, h_c);

//Gpu Mats (declaring with the same pointers!)
cuda::GpuMat dmat_a(Size(cols, rows), CV_32F, h_a);
cuda::GpuMat dmat_b(Size(cols, rows), CV_32F, h_b);
cuda::GpuMat dmat_c(Size(cols, rows), CV_32F, h_c);

cuda::multiply(dmat_a, dmat_b, dmat_c);

std::cout << hmat_c << std::endl;

  return 0;
}