I’m trying to compile the example code from Section 3.2.5 of the CUDA reference manual, but get the following error from ptxas:
[i]1>ptxas C:/Temp/tmpxft_00002200_00000000-7_surfTest.compute_10.ptx, line 48; Ç=8ü: Parsing error near ‘.surf’: syntax error
1>ptxas Ç=8ü: Ptx assembly aborted due to errors[/i]
I get the same errors from my other code when I simply add the line
surface<void,2> surfRef;
System spec:
Windows XP
CUDA +SDK v3.1
Quadro fx 3700 (Compute Compatibility 1.1)
Visual Studio 2008 Express
DisplayDriver 257.21
Code snippet below:
[codebox]#include <stdio.h>
#include <string.h>
#include <cutil_inline.h>
#include <cuda_runtime_api.h>
// 2D surfaces
surface<void, 2> inputSurfRef;
surface<void, 2> outputSurfRef;
// Simple copy kernel
global void copyKernel(int width, int height) {
// Calculate surface coordinates
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < width && y < height) {
uchar4 data;
// Read from input surface
surf2Dread(&data, inputSurfRef, x * 4, y);
// Write to output surface
surf2Dwrite(data, outputSurfRef, x * 4, y);
}
}
extern “C” void surfTest() {
int width = 10;
int height = 10;
int size = width * height * sizeof(float);
float *h_data = (float*) malloc(size);
// Allocate CUDA arrays in device memory
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned);
cudaArray* cuInputArray;
cudaMallocArray(&cuInputArray, &channelDesc, width, height, cudaArraySurfaceLoadStore);
cudaArray* cuOutputArray;
cudaMallocArray(&cuOutputArray, &channelDesc, width, height, cudaArraySurfaceLoadStore);
// Copy to device memory some data located at address h_data
// in host memory
cudaMemcpyToArray(cuInputArray, 0, 0, h_data, size, cudaMemcpyHostToDevice);
// Bind the arrays to the surface references
cudaBindSurfaceToArray(inputSurfRef, cuInputArray);
cudaBindSurfaceToArray(outputSurfRef, cuOutputArray);
// Invoke kernel
dim3 dimBlock(16, 16);
dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x,
(height + dimBlock.y - 1) / dimBlock.y);
copyKernel<<<dimGrid, dimBlock>>>(width, height);
// Free device memory
cudaFreeArray(cuInputArray);
cudaFreeArray(cuOutputArray);
}
[/codebox]
Unfortunately there are no examples that I could find in the SDK which use surfaces, so I can’t test whether these compile.
Any ideas?
- Richard