CUDA version for GeForce FX 5200

I’m trying to convince a GeForce FX 5200 to work with CUDA and am having a bit of trouble - presumably because I can’t use the latest driver for Linux. It seems a though I am forced to use the driver contained in NVIDIA-Linux-x86-173.14.18-pkg1. However, when I compile a simple CUDA program with cuda 2.1, the program gets a “feature not implemented” error on the first cudaMalloc() call. I get the same results with cuda 2.0. When I try to use cuda 1.1, it spits lots of C++ related errors even though it is a C program.

Is there a particular version of cuda that I should be using with this card? If the answer is cuda 1.1, is there a version of gcc or other compile tricks that are known to work with it? I am using Fedora 9.

Results with 2.1 in emulator mode - everything seems to work:

[codebox]$ make simple

/tmp/cuda-2.1/cuda/bin/nvcc -V

nvcc: NVIDIA ® Cuda compiler driver

Copyright © 2005-2007 NVIDIA Corporation

Built on Wed_Dec__3_18:29:25_PST_2008

Cuda compilation tools, release 2.1, V0.2.1221

/tmp/cuda-2.1/cuda/bin/nvcc -deviceemu -g -c simple.o simple.cu

gcc -L/tmp/cuda-2.1/cuda/lib -Wl,-R/tmp/cuda-2.1/cuda/lib -lcudart -o simple simple.o

$ ./simple

1 + 3 = 4

2 + 4 = 6

[/codebox]

Results with 2.1 trying to use the GPU:

[codebox]$ make simple

/tmp/cuda-2.1/cuda/bin/nvcc -V

nvcc: NVIDIA ® Cuda compiler driver

Copyright © 2005-2007 NVIDIA Corporation

Built on Wed_Dec__3_18:29:25_PST_2008

Cuda compilation tools, release 2.1, V0.2.1221

/tmp/cuda-2.1/cuda/bin/nvcc -g -c simple.o simple.cu

gcc -L/tmp/cuda-2.1/cuda/lib -Wl,-R/tmp/cuda-2.1/cuda/lib -lcudart -o simple simple.o

$ ./simple

cudaMalloc a: feature is not yet implemented

[/codebox]

Results with 2.0 trying to use the GPU:

[codebox]$ make simple

/tmp/cuda-2.0/cuda/bin/nvcc -V

nvcc: NVIDIA ® Cuda compiler driver

Copyright © 2005-2007 NVIDIA Corporation

Built on Thu_Jun_19_04:48:21_PDT_2008

Cuda compilation tools, release 2.0, V0.2.1221

/tmp/cuda-2.0/cuda/bin/nvcc -g -c simple.o simple.cu

gcc -L/tmp/cuda-2.0/cuda/lib -Wl,-R/tmp/cuda-2.0/cuda/lib -lcudart -o simple simple.o

$ ./simple

cudaMalloc a: feature is not yet implemented

[/codebox]

However, 1.1 won’t even compile:

[codebox]$ make simple

/tmp/cuda-1.1/cuda/bin/nvcc -V

nvcc: NVIDIA ® Cuda compiler driver

Copyright © 2005-2006 NVIDIA Corporation

Built on Fri_Nov_30_02:31:29_PST_2007

Cuda compilation tools, release 1.1, V0.2.1221

/tmp/cuda-1.1/cuda/bin/nvcc -deviceemu -g -c simple.o simple.cu

"/usr/lib/gcc/i386-redhat-linux/4.3.0/…/…/…/…/include/c++/4.3.0/i386-redhat-linux/

bits/c++config.h", line 233: error:

      expected a "{"

namespace std attribute ((visibility (“default”))) {

            ^

namespace __gnu_cxx attribute ((visibility (“default”))) {

                  ^

Error limit reached.

100 errors detected in the compilation of “/tmp/tmpxft_0000218f_00000000-2.ii”.

Compilation terminated.

make: *** [simple.o] Error 255

[/codebox]

The source code is about as simple of an example as I can come up with that does proper error checking.

[codebox]#include <stdio.h>

#include <builtin_types.h>

global void

add(unsigned char *g_ia, unsigned char *g_ib, unsigned char *g_res)

{

    const unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;

    g_res[i] = g_ia[i] + g_ib[i];

}

int main(int argc, char **argv) {

    unsigned char h_ia[] = {1,2};   /* host input */

    unsigned char h_ib[] = {3,4};   /* host input */

    unsigned char h_res[2];         /* host result */

    unsigned char *d_ia, *d_ib;     /* device input data */

    unsigned char *d_res;           /* device result */

    unsigned int memsize = 2;

    cudaError_t err;

/*

     * Allocate space for input data and copy to device

     */

    if ((err=cudaMalloc((void**) &d_ia, memsize)) != cudaSuccess) {

            fprintf(stderr, "cudaMalloc a: %s\n", cudaGetErrorString(err));

            return(1);

    }

    if ((err=cudaMemcpy(d_ia, h_ia, memsize, cudaMemcpyHostToDevice)) != 

        cudaSuccess) {

            fprintf(stderr, "cudaMemcpy a: %s\n", cudaGetErrorString(err));

            return(1);

    }

if (cudaMalloc((void**) &d_ib, memsize) != cudaSuccess) {

            fprintf(stderr, "cudaMalloc b: %s\n", cudaGetErrorString(err));

            return(1);

    }

    if ((err=cudaMemcpy(d_ib, h_ib, memsize, cudaMemcpyHostToDevice)) !=

        cudaSuccess) {

            fprintf(stderr, "cudaMemcpy b: %s\n", cudaGetErrorString(err));

            return(1);

    }

/*

     * Allodate space for output data

     */

    if ((err=cudaMalloc((void**) &d_res, memsize)) != cudaSuccess) {

            fprintf(stderr, "cudaMalloc r: %s\n", cudaGetErrorString(err));

            return(-1);

    }

/*

     * Set up execution

     */

    dim3 grid(1, 1, 1);

    dim3 threads(2, 1, 1);

/*

     * Execute

     */

    add<<< grid, threads, memsize >>>(d_ia, d_ib, d_res);

/*

     * Copy result from device

     */

    if ((err=cudaMemcpy(h_res, d_res, memsize, cudaMemcpyDeviceToHost))

        != cudaSuccess) {

            fprintf(stderr, "cudaMemcpy r (d->h): %s\n",

                cudaGetErrorString(err));

            return(1);

    }

for (int i=0; i<2; i++) {

            printf("%d + %d = %d\n", h_ia[i], h_ib[i], h_res[i]);

    }

return(0);

}

[/codebox]

The FX 5200 does not support CUDA.

Does the e-GeForce 6200 supports CUDA ?

No.