Problem with double datatype and 285GTX

I have a new 285GTX. Now I would test the datatype double.

I have created a simple testkernel. This kernel multiplies a 1 dimensional vector with a constant factor. But the result is zero. If I use the datatype float, then the result is right.

I use CUDA 2.1 Software.

Here ist my code:

// includes, system

#include <stdlib.h>

#include <stdio.h>

#include <string.h>

#include <math.h>

// includes, project

#include <cufft.h>

#include <cutil_inline.h>

static global void absolute_value(double*, double*, double, unsigned int);


// declaration, forward

void runTest(int argc, char** argv);

// The filter size is assumed to be a number smaller than the signal size

#define SIGNAL_SIZE 512*256

#define coeff 0.0135


// Program main


int main(int argc, char** argv)


runTest(argc, argv);

cutilExit(argc, argv);



//! Run a simple test for CUDA


void runTest(int argc, char** argv)


if( cutCheckCmdLineFlag(argc, (const char**)argv, “device”) )

    cutilDeviceInit(argc, argv);


    cudaSetDevice( cutGetMaxGflopsDeviceId() );

// Allocate host memory for the signal

double* h_signal_x = (double*)malloc(sizeof(double) * SIGNAL_SIZE);

double* h_results = (double*)malloc(sizeof(double) * SIGNAL_SIZE);

// Initalize the memory for the signal

for(unsigned int i = 0; i < SIGNAL_SIZE; ++i) 


    h_signal_x[i] = 3.0;                   


for(unsigned int i = 0; i < 10; ++i)


    printf("Wert: %f \n", h_signal_x[i]);                


int mem_size = sizeof(double) * SIGNAL_SIZE;

// Allocate device memory for input signal

double* d_in;

cutilSafeCall(cudaMalloc((void**)&d_in, mem_size));

// Copy host memory to device

cutilSafeCall(cudaMemcpy(d_in, h_signal_x, mem_size, cudaMemcpyHostToDevice));

// Allocate device memory for result

double* d_out;

cutilSafeCall(cudaMalloc((void**)&d_out, mem_size));

// Multiply the coefficients together and normalize the result

absolute_value<<<SIGNAL_SIZE/512, 512>>>(d_in,d_out,coeff,SIGNAL_SIZE);

// Check if kernel execution generated and error

cutilCheckMsg("Kernel execution failed [ absolute_value ]");

// Copy device memory to host

cutilSafeCall(cudaMemcpy(h_results, d_out, mem_size, cudaMemcpyDeviceToHost));

// Das Ergebnis anzeigen

for (unsigned int i = 0; i < 10; ++i) 


    printf(" Ergebnis: %2.20f \n", h_results[i]);        


// cleanup memory





// Betrag berechnen

global void absolute_value(double *d_in, double *d_out, double y, unsigned int length)


int tid = threadIdx.x + blockIdx.x*blockDim.x;

if( tid >= length )




d_out[tid] = d_in[tid] * y;


compile with -arch sm_13

Tomorrow I will test it.

This are my compiler parameters. How must I modify this options?

(CUDA_BIN_PATH)\nvcc.exe" -ccbin "(VCInstallDir)bin” -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MTd -I"(CUDA_INC_PATH)" -I./ -I../../common/inc -o (ConfigurationName)\simpleCUFFT.obj

1. Is my modification OK?

(CUDA_BIN_PATH)\nvcc.exe" <b>-arch sm_13</b> -ccbin "(VCInstallDir)bin” -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MTd -I"(CUDA_INC_PATH)" -I./ -I../../common/inc -o (ConfigurationName)\simpleCUFFT.obj