I have a very simple function where I’m passing in a char array and doing a simple character match. I want to return an array of 1/0 values depending on which characters are matched.
Problem: although I can see the value has been set in the data structure (as I print it in the function after it’s assigned) when the int array is copied back from the device the values aren’t as expected.
I’m sure it’s something silly.
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import numpy as np
mod = SourceModule("""
__global__ void test(const char *q, const int chrSize, int *d, const int intSize) {
int v = 0;
if( q[threadIdx.x * chrSize] == 'a' || q[threadIdx.x * chrSize] == 'c' ) {
v = 1;
}
d[threadIdx.x * intSize] = v;
printf("x=%d, y=%d, val=%c ret=%d\\n", threadIdx.x, threadIdx.y, q[threadIdx.x * chrSize], d[threadIdx.x * intSize]);
}
""")
func = mod.get_function("test")
# input data
a = np.asarray(['a','b','c','d'], dtype=np.str_)
# allocate/copy to device
a_gpu = cuda.mem_alloc(a.nbytes)
cuda.memcpy_htod(a_gpu, a)
# destination array
d = np.zeros((4), dtype=np.int16)
# allocate/copy to device
d_gpu = cuda.mem_alloc(d.nbytes)
cuda.memcpy_htod(d_gpu, d)
# run the function
func(a_gpu, np.int8(a.dtype.itemsize), d_gpu, np.int8(d.dtype.itemsize), block=(4,1,1))
# copy data back and print
cuda.memcpy_dtoh(d, d_gpu)
print(d)
Output:
x=0, y=0, val=a ret=1
x=1, y=0, val=b ret=0
x=2, y=0, val=c ret=1
x=3, y=0, val=d ret=0
[1 0 0 0]
Expected output:
x=0, y=0, val=a ret=1
x=1, y=0, val=b ret=0
x=2, y=0, val=c ret=1
x=3, y=0, val=d ret=0
[1 0 1 0]
Thanks in advance.
This has also been asked on stack overflow.