By nppiAlphaComp_8u_AC4R of CUDA 9.1 on Windows x64, I composited two 100% white images with 100% opaque alpha, in short #FFFFFFFF, and I got a mixture of #FFFFFF and #FEFEFE. It looks strange behavior. Thanks.
import numpy as np
import cv2
import ctypes as ct
import numpy.ctypeslib as npct
from enum import IntEnum
DLL_PATH = r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.1\bin\\"
WIDTH, HEIGHT = 512, 512
N_CH = 4
def _loadl_dll(name):
return npct.load_library(DLL_PATH + name, ".")
_dll_rt = _loadl_dll("cudart64_91.dll")
class _cudaMemcpyKind(IntEnum):
cudaMemcpyDeviceToHost = 2
_dll_rt.cudaMemcpy.argtypes = [ct.POINTER(ct.c_uint8), ct.POINTER(ct.c_uint8), ct.c_int32, ct.c_int32]
def memcpy_d2h(pt_src, dst):
return _dll_rt.cudaMemcpy(dst.ctypes.data_as(ct.POINTER(ct.c_uint8)), pt_src, dst.nbytes,
ct.c_int32(_cudaMemcpyKind.cudaMemcpyDeviceToHost))
_dll_rt.cudaMemset.argtypes = [ct.POINTER(ct.c_uint8), ct.c_int32, ct.c_int32]
def memset(pt_dst, val, count):
return _dll_rt.cudaMemset(pt_dst, val, count)
_dll_sf = _loadl_dll("npps64_91.dll")
_dll_sf.nppsMalloc_8u.argtypes = [ct.c_int32]
_dll_sf.nppsMalloc_8u.restype = ct.POINTER(ct.c_uint8)
def malloc_pt(size):
return _dll_sf.nppsMalloc_8u(size)
def free_pt(pt):
return _dll_sf.nppsFree(pt)
_dll_al = _loadl_dll("nppial64_91.dll")
class NppiSize(ct.Structure):
_fields_ = [("width", ct.c_int), ("height", ct.c_int)]
class _NppiAlphaOp(IntEnum):
NPPI_OP_ALPHA_OVER = 0
_dll_al.nppiAlphaComp_8u_AC4R.argtypes = [ct.POINTER(ct.c_uint8), ct.c_int32, ct.POINTER(ct.c_uint8), ct.c_int32, ct.POINTER(ct.c_uint8), ct.c_int32, NppiSize, ct.c_int32]
def alpha_comp(pt_src_upper, pt_src_lower, pt_dst, step, width, height):
return _dll_al.nppiAlphaComp_8u_AC4R(
pt_src_upper, step,
pt_src_lower, step,
pt_dst, step,
NppiSize(width, height),
_NppiAlphaOp.NPPI_OP_ALPHA_OVER)
def alloc_white():
size = WIDTH * HEIGHT * N_CH
pt = malloc_pt(size)
memset(pt, 255, size)
return pt
def show(caption, pt):
cv2buf = np.zeros(shape=[WIDTH, HEIGHT], dtype=np.uint8)
memcpy_d2h(pt, cv2buf)
cv2.imshow(caption, cv2buf)
cv2.waitKey(0)
cv2.destroyAllWindows()
def save(filename, pt):
cv2buf = np.zeros(shape=[WIDTH, HEIGHT], dtype=np.uint8)
memcpy_d2h(pt, cv2buf)
cv2.imwrite(filename, cv2buf)
pt_white1 = alloc_white()
pt_white2 = alloc_white()
show("source", pt_white1)
save("source.png", pt_white1)
pt_dst = malloc_pt(WIDTH * HEIGHT * N_CH)
alpha_comp(pt_white1, pt_white2,pt_dst, WIDTH * N_CH, WIDTH, HEIGHT)
show("result", pt_dst)
save("result.png", pt_dst)
free_pt(pt_white1)
free_pt(pt_white2)
free_pt(pt_dst)