Hi,
I’m trying to change an image in a kernel function. Whenever I try to change the image, CUDA reports an “unknown error”.
extern "C" __declspec(dllexport) void invert(unsigned char* img, int startX, int startY, int stopX, int stopY, int pixelSize, int stride, int img_length) {
// Filter values
int filterStart = startY * stride + startX * pixelSize;
int filterWidth = stopX - startX;
int filterHeight = stopY - startY;
// Threads properties
int threads = 256;
int blocks = filterHeight / threads + 1;
// Debug information
printf("blocks: %d threads: %d \n", blocks, threads);
printf("invert => filterHeight: %d filterWidth: %d filterStart: %d pixelSize: %d stride: %d img_length: %d \n", filterHeight, filterWidth, filterStart, pixelSize, stride, img_length);
// Allocate memory on device
int* img_d;
cudaMalloc(&img_d, img_length);
// Copy complete image to device
cudaMemcpy(img_d, img, img_length, cudaMemcpyHostToDevice);
// Call kernel function for each line
invertCuda<<<blocks, threads>>>(img, filterHeight, filterWidth, filterStart, pixelSize, stride);
printf("%s\n", cudaGetErrorString(cudaPeekAtLastError()));
printf("%s\n", cudaGetErrorString(cudaThreadSynchronize()));
// Copy image back to host
cudaMemcpy(img, img_d, img_length, cudaMemcpyDeviceToHost);
}
__global__ void invertCuda(unsigned char* img, int filterHeight, int filterWidth, int filterStart, int pixelSize, int stride) {
img[100] = 0;
}
Output is:
blocks: 1 threads: 256
invert => filterHeight: 30 filterWidth: 40 filterStart: 28845 pixelSize: 3 stride: 1920 img_length: 921600
no error
unknown error
This fault only happens, if I try to change something in the image array.