GTX280 can not support double, my newest test

my GPU is GTX280,revision number is 1.3, nvidia said it should support double ,but my
test show it can not support double, who can give a suggestion? or,who in nvidia company
give me a explanation.

my email: zhangyuaniecas@gmail.com

the 1) program show GTX280 can not support double
the 2) program show CUDA2.0 cos/sin/tan can not support double

//--------------------------double test------------------------------------------------
#include <stdio.h>
#include <cutil.h>

#define VARTYPE double
//#define VARTYPE float
global void test(VARTYPE *data) {
int id = threadIdx.x + threadIdx.y * gridDim.x;
data[id] = id;
}

#define NB 16
int main() {
dim3 blockSize(NB, 1);

VARTYPE dbg;
VARTYPE res[NB];
CUDA_SAFE_CALL(cudaMalloc((void
*)&dbg, sizeof(*dbg) * NB));
test<<<1, blockSize>>>(dbg);
CUT_CHECK_ERROR(“1. kernel”);
CUDA_SAFE_CALL(cudaMemcpy(res, dbg, sizeof(*res) * NB, cudaMemcpyDeviceToHost));
//CUDA_SAFE_CALL(cudaThreadSynchronize());
for (int i = 0; i != NB; i++) printf("%3d %f\n", i , res[i]);

CUDA_SAFE_CALL(cudaFree(dbg));
}

//--------------------------------------------------------------------------

//---------------------------cos test------------------------------------------------
#include<stdio.h>
#include<cutil.h>
#include<unistd.h>
typedef float FP_t;
//typedef double FP_t;

int line=10;
int print_count=20;
int BATCH=10;
int memsz=0;
FP_t test_num=1.56;
static global void do_cos(FP_t* in,int N,int nbatch)
{

for(int bid=blockIdx.x; bid< nbatch; bid+=gridDim.x)
{
	FP_t* tmp = in+ bid*N;	
	for(int tid=threadIdx.x; tid<N; tid+=blockDim.x)
	{
		tmp[tid]=cos(tmp[tid]);
	}
}

}

int main(int argc, char * argv)
{
CUT_DEVICE_INIT(argc,argv);
FP_t* d_ptr_in;
FP_t* h_ptr;
memsz=sizeof(FP_t)lineBATCH;
CUDA_SAFE_CALL(cudaMalloc((void **)&(d_ptr_in), (memsz)));
CUDA_SAFE_CALL(cudaMallocHost((void **)&(h_ptr), (memsz)));

for(int i=0;i<line*BATCH;i++)h_ptr[i]=test_num;
for(int i=0;i<print_count;i++) printf("cpu:%20.10f\n",cos(h_ptr[0]));
CUDA_SAFE_CALL(cudaMemcpy((d_ptr_in), (h_ptr), (memsz),cudaMemcpyHostToDevice)); 

do_cos<<<20,128>>>(d_ptr_in,line,BATCH);

CUT_CHECK_ERROR("kernel failed");
CUDA_SAFE_CALL(cudaMemcpy((h_ptr), (d_ptr_in), (memsz),cudaMemcpyDeviceToHost));	

for(int i=0;i<print_count;i++)printf("gpu:%20.10f\n",h_ptr[i]);

CUT_EXIT(argc,argv);

}
//-----------------------------------------------------------------------------------------

maybe setting the nvcc compiler option for compute capacity 1.3 is required?

who in nvidia company can give me some suggestion, if there is any nvccc compiler option , please tell us . It costs me a lot of money ,but disappoint me.

According to CUDA 2.0 beta 2 announcement, you must compile with “–arch compute_13” to enable compute capability 1.3.