Hey guys,
I made my first tries with CUDA. Now I have a little problem with a really simple application. It should just fill an array with some numbers.
[codebox]
#include <cuda.h>
#include <cutil.h>
#include <stdio.h>
#define N 100
#define BLOCK_SIZE 8
#define BLOCK_COUNT N / BLOCK_SIZE
typedef struct {
int size;
float* elements;
} test_t;
global void testCudaMethod( test_t * t ) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
if( x < t->size ) {
t->elements[ x ] = (float)x;
}
}
int main( int argc, char ** argv ) {
test_t host;
test_t device;
size_t size = sizeof( float ) * N;
host.size = device.size = N;
host.elements = (float*)malloc( size );
CUDA_SAFE_CALL( cudaSetDevice( 0 ) );
CUDA_SAFE_CALL( cudaMalloc( (void**)&device.elements, size ) );
CUDA_SAFE_CALL( cudaMemset( device.elements, 0, size ) );
testCudaMethod<<< BLOCK_COUNT, BLOCK_SIZE>>>( &device );
CUDA_SAFE_CALL( cudaMemcpy( host.elements, device.elements, size, cudaMemcpyDeviceToHost ) );
CUDA_SAFE_CALL( cudaFree( device.elements ) );
for( int i = 0; i < host.size; i++ ) {
printf( "-> %.1f\n", host.elements[ i ] );
}
free( host.elements );
return 0;
}[/codebox]
After compiling the posted code with “nvcc test.cu -o test -I~/NVIDIA_CUDA_SDK/common/inc” and executing the application, I always get the following error message: “Cuda error in file ‘test.cu’ in line 34 : unspecified launch failure”. That’s exactly the line where I try to copy the device memory to the host memory. I think that I made a stupid failure in this sample code but I can’t figure it out. Can you give me some advices?
Regards,
Tim
Hey guys,
I made my first tries with CUDA. Now I have a little problem with a really simple application. It should just fill an array with some numbers.
[codebox]
include <cuda.h>
include <cutil.h>
include <stdio.h>
define N 100
define BLOCK_SIZE 8
define BLOCK_COUNT N / BLOCK_SIZE
typedef struct {
int size;
float* elements;
} test_t;
global void testCudaMethod( test_t * t ) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
if( x < t->size ) {
t->elements[ x ] = (float)x;
}
}
int main( int argc, char ** argv ) {
test_t host;
test_t device;
size_t size = sizeof( float ) * N;
host.size = device.size = N;
host.elements = (float*)malloc( size );
CUDA_SAFE_CALL( cudaSetDevice( 0 ) );
CUDA_SAFE_CALL( cudaMalloc( (void**)&device.elements, size ) );
CUDA_SAFE_CALL( cudaMemset( device.elements, 0, size ) );
testCudaMethod<<< BLOCK_COUNT, BLOCK_SIZE>>>( &device );
CUDA_SAFE_CALL( cudaMemcpy( host.elements, device.elements, size, cudaMemcpyDeviceToHost ) );
CUDA_SAFE_CALL( cudaFree( device.elements ) );
for( int i = 0; i < host.size; i++ ) {
printf( "-> %.1f\n", host.elements[ i ] );
}
free( host.elements );
return 0;
}[/codebox]
After compiling the posted code with “nvcc test.cu -o test -I~/NVIDIA_CUDA_SDK/common/inc” and executing the application, I always get the following error message: “Cuda error in file ‘test.cu’ in line 34 : unspecified launch failure”. That’s exactly the line where I try to copy the device memory to the host memory. I think that I made a stupid failure in this sample code but I can’t figure it out. Can you give me some advices?
Regards,
Tim
Hi!
&device is actually a pointer whose value is to be interpreted as an adress in host memory since device is a variable residing in host mem. But the kernel
__global__ void testCudaMethod( test_t * t ) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
if( x < t->size ) {
t->elements[ x ] = (float)x;
}
}
will interpret the parameter t as an address in device mem and will therefore cause a segmentation fault.
Regards
Navier
Hey Navier,
thank you for your advice. Now it works as it should work :)
Regars,
Tim