Hi All,
I was trying to use driver API transfer some data from host to device.
I am using redhat enterprise edition 5 and "GeForce 8800 GT
Major revision number: 1
Minor revision number: 1
Total amount of global memory: 536150016 bytes
Number of multiprocessors: 14
Number of cores: 112
Total amount of constant memory: 65536 bytes
Total amount of shared memory per block: 16384 bytes
Total number of registers available per block: 8192
Warp size: 32
Maximum number of threads per block: 512
Maximum sizes of each dimension of a block: 512 x 512 x 64
Maximum sizes of each dimension of a grid: 65535 x 65535 x 1
Maximum memory pitch: 262144 bytes
Texture alignment: 256 bytes
Clock rate: 1.51 GHz
Concurrent copy and execution: Yes
when i run the matrixMulDrv from cuda sdk it says test passed.
And when i try to run the below given code it gives me Segmentation fault.
Is this becoz i am doing something wrong or is it becoz i am not including any lib or is it driver problem???
Help Needed
Advance Thanks,
Tom
the driver info::
Title: NVIDIA Accelerated Graphics Driver Set for Linux-x86
Version: 169.12
Description: The NVIDIA Accelerated Graphics Driver Set for Linux-x86
provides accelerated 2D and 3D support for Linux-x86
using NVIDIA GPUs.
Keywords: OpenGL XFree86 NVIDIA
Author: NVIDIA Corporation Linux Development Team (linux-bugs ‘at’ nvidia.com)
Maintained-by: NVIDIA Corporation Linux Development Team (linux-bugs ‘at’ nvidia.com)
Original-site: http://www.nvidia.com
Platform: Linux
Copying-policy: NVIDIA Software License
#include <stdint.h>
#include <fcntl.h>
#include <sys/types.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <cuda.h>
#include <cutil.h>
#include <cutil_inline.h>
#include <cufft.h>
int runTest(int argc, char** argv);
int main(int argc, char **argv)
{
runTest(argc, argv);
return 0;
}
int runTest(int argc, char **argv)
{
int cDevices;
int hcuDevice = 0;
CUresult status;
char szName[256];
CUdevprop devProps;
int* h_data ;
status = cuInit(0);
if ( CUDA_SUCCESS != status )
{
return 1;
}
status = cuDeviceGetCount( &cDevices );
if ( CUDA_SUCCESS != status )
{
return 1;
}
if ( cDevices == 0 )
{
return 1;
}
status = cuDeviceGet( &hcuDevice, 0 );
if ( CUDA_SUCCESS != status )
{
return 1;
if ( CUDA_SUCCESS == cuDeviceGetProperties( &devProps, hcuDevice ) )
{
printf(“\tsharedMemPerBlock: %d\n”, devProps.sharedMemPerBlock );
printf(“\tconstantMemory : %d\n”, devProps.totalConstantMemory );
printf(“\tregsPerBlock : %d\n”, devProps.regsPerBlock );
printf(“\tclockRate : %d\n”, devProps.clockRate );
}
CUcontext hcuContext = 0;
CUdevprop hcuDevprops;
status = cuCtxCreate( &hcuContext, 0, hcuDevice );
if ( CUDA_SUCCESS != status )
{
fprintf( stderr, "cuCtxCreate failed %d\n",status);
}
if ( CUDA_SUCCESS != cuDeviceGetProperties( &hcuDevprops, hcuDevice ) )
{
fprintf(stderr,"cuDeviceGetProperties failed!\n");
}
if((h_data=(int*)malloc(200000))==NULL)
printf("ERROR FROM MALLOC\n");
memset(h_data,0,20000);
CUdeviceptr hcuarray;
int mem_size=200000;
cuMemAlloc(&hcuarray,mem_size);
cutilDrvSafeCallNoSync(cuMemcpyHtoD(hcuarray,&h_data,200000));
while(1)
{
}
return 0;
}