It is possible convert part of this code to cuda to improve speed?

Please, I need help to convert part of this code to cuda. It is possible to improve speed with this?

#!/usr/bin/env python3
# coding: utf-8
# original code https://github.com/verhovsky/squircle/blob/master/squircle.py

import cv2
import math
import time
import numpy

_epsilon = 0.0000000001

def _sgn(x):
    if x == 0.0:
        return 0.0
    if x < 0:
        return -1.0
    return 1.0

def _pixel_coordinates_to_unit(coordinate, max_value):
    return coordinate / max_value * 2 - 1

def _one_coordinates_to_pixels(coordinate, max_value):
    return (coordinate + 1) / 2 * max_value

def _stretch_square_to_disc(x, y):
    if (abs(x) < _epsilon) or (abs(y) < _epsilon):
        return x, y

    x2 = x * x
    y2 = y * y
    hypotenuse_squared = x * x + y * y

    reciprocal_hypotenuse = 1.0 / math.sqrt(hypotenuse_squared)

    multiplier = 1.0

    if x2 > y2:
        multiplier = _sgn(x) * x * reciprocal_hypotenuse
    else:
        multiplier = _sgn(y) * y * reciprocal_hypotenuse

    return x * multiplier, y * multiplier

def _transform(inp):
    result = numpy.zeros_like(inp)
    for x, row in enumerate(inp):

        unit_x = _pixel_coordinates_to_unit(x, len(inp))

        for y, _ in enumerate(row):
            unit_y = _pixel_coordinates_to_unit(y, len(row))

            try:
                uv = _stretch_square_to_disc(unit_x, unit_y)
                if uv is None:
                    continue
                u, v = uv

                u = _one_coordinates_to_pixels(u, len(inp))
                v = _one_coordinates_to_pixels(v, len(row))

                result[x][y] = inp[math.floor(u)][math.floor(v)]
            except IndexError:
                pass

    return result

# -- load and test

img = cv2.imread('circle.png')

elapsed = round(time.time() * 1000)
squareImage = _transform(img[0:224, 0:224])
print(str(round(time.time() * 1000) - elapsed)+' ms to squareImage')

cv2.imshow('square', squareImage)
key = cv2.waitKey(0)

cv2.destroyAllWindows()