Problem in cuComplex.h (solution added) the use of math.h instead of cmath?

I am currently developing an application that calls a cuda library.

My application is compiled with g++ 4.3 and needs to know about the cuda datatypes.

When compiling it creates the following error:

Making dependency list for source file src/main.cpp

SOURCE=src/main.cpp ;  g++ -m64 -Dlinux64 -DWM_DP  -Wall -Wextra -Wno-unused-parameter -Wnon-virtual-dtor -O3 -fno-strict-aliasing  -DNoRepository -ftemplate-depth-40 -I/home/jkastrup/Engys/OpenFOAM-1.6_engysEdition-2.0.beta/src/finiteVolume/lnInclude/     -I../foam_cuda_lib/include/     -Iinclude/     -I/usr/local/cuda/include/ -IlnInclude -I. -I/home/jkastrup/Engys/OpenFOAM-1.6_engysEdition-2.0.beta/src/OpenFOAM/lnInclude -I/home/jkastrup/Engys/OpenFOAM-1.6_engysEdition-2.0.beta/src/OSspecific/POSIX/lnInclude   -fPIC -c $SOURCE -o Make/linux64GccDPOpt/main.o

In file included from /usr/local/cuda/include/cusp/complex.h:71,

                 from /usr/local/cuda/include/cusp/blas.h:26,

                 from /usr/local/cuda/include/cusp/detail/device/conversion.h:20,

                 from /usr/local/cuda/include/cusp/detail/device/convert.h:23,

                 from /usr/local/cuda/include/cusp/detail/dispatch/convert.h:21,

                 from /usr/local/cuda/include/cusp/detail/convert.inl:17,

                 from /usr/local/cuda/include/cusp/convert.h:47,

                 from /usr/local/cuda/include/cusp/detail/coo_matrix.inl:17,

                 from /usr/local/cuda/include/cusp/coo_matrix.h:368,

                 from ../foam_cuda_lib/include/foam_cuda_lib_solver.h:5,

                 from src/main.cpp:9:

/usr/local/cuda/include/cuComplex.h: In function ‘double cuCabs(cuDoubleComplex)’:

/usr/local/cuda/include/cuComplex.h:273: error: call of overloaded ‘sqrt(double&)’ is ambiguous

/usr/include/bits/mathcalls.h:157: note: candidates are: double sqrt(double)

/home/jkastrup/Engys/OpenFOAM-1.6_engysEdition-2.0.beta/src/OpenFOAM/lnInclude/dimensionedScalar.H:65: note:                 Foam::dimensionedScalar Foam::sqrt(const Foam::dimensionedScalar&)

/home/jkastrup/Engys/OpenFOAM-1.6_engysEdition-2.0.beta/src/OpenFOAM/lnInclude/Scalar.H:233: note:                 Foam::doubleScalar Foam::sqrt(Foam::doubleScalar)

/home/jkastrup/Engys/OpenFOAM-1.6_engysEdition-2.0.beta/src/OpenFOAM/lnInclude/Scalar.H:233: note:                 Foam::floatScalar Foam::sqrt(Foam::floatScalar)

make: *** [Make/linux64GccDPOpt/main.o] Error 1

Which in turn comes down to a problem with the sqrt function imported in cuComplex.h.

A solution is to edit cuComplex.h by changing the imported version #import <math.h> to #import and add a “using std::sqrt” to the function that calls sqrt.

I have added my edited version with comments (// EDITIED BY J. KASTRUP) on the three lines I have edited in order to make it work.

Can this fix be added to the next update?

NOTE: I have not tested the cuComplex functions after the edit since I’m not using them.

Other info:

OS: Ubuntu 10.04 (64 bit)

CUDA toolkit: 4.0

Driver: Dev driver for linux 270.41.19

Cusp: v0.2.0.

/*

 * Copyright 1993-2011 NVIDIA Corporation.  All rights reserved.

 *

 * NOTICE TO LICENSEE:

 *

 * This source code and/or documentation ("Licensed Deliverables") are

 * subject to NVIDIA intellectual property rights under U.S. and

 * international Copyright laws.

 *

 * These Licensed Deliverables contained herein is PROPRIETARY and

 * CONFIDENTIAL to NVIDIA and is being provided under the terms and

 * conditions of a form of NVIDIA software license agreement by and

 * between NVIDIA and Licensee ("License Agreement") or electronically

 * accepted by Licensee.  Notwithstanding any terms or conditions to

 * the contrary in the License Agreement, reproduction or disclosure

 * of the Licensed Deliverables to any third party without the express

 * written consent of NVIDIA is prohibited.

 *

 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE

 * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE

 * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS

 * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.

 * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED

 * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,

 * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.

 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE

 * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY

 * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY

 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,

 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS

 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE

 * OF THESE LICENSED DELIVERABLES.

 *

 * U.S. Government End Users.  These Licensed Deliverables are a

 * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT

 * 1995), consisting of "commercial computer software" and "commercial

 * computer software documentation" as such terms are used in 48

 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government

 * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and

 * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all

 * U.S. Government End Users acquire the Licensed Deliverables with

 * only those rights set forth herein.

 *

 * Any use of the Licensed Deliverables in individual and commercial

 * software must include, in the user documentation and internal

 * comments to the code, the above Disclaimer and U.S. Government End

 * Users Notice.

 */

#if !defined(CU_COMPLEX_H_)

#define CU_COMPLEX_H_

#if defined(__cplusplus)

extern "C" {

#endif /* __cplusplus */

//#include <math.h>  //     import fabsf, sqrt // EDITED BY J. KASTRUP

#include <cmath>                              // EDITED BY J. KASTRUP

#include "vector_types.h"

typedef float2 cuFloatComplex;

__host__ __device__ static __inline__ float cuCrealf (cuFloatComplex x) 

{ 

    return x.x; 

}

__host__ __device__ static __inline__ float cuCimagf (cuFloatComplex x) 

{ 

    return x.y; 

}

__host__ __device__ static __inline__ cuFloatComplex make_cuFloatComplex 

                                                             (float r, float i)

{

    cuFloatComplex res;

    res.x = r;

    res.y = i;

    return res;

}

__host__ __device__ static __inline__ cuFloatComplex cuConjf (cuFloatComplex x)

{

    return make_cuFloatComplex (cuCrealf(x), -cuCimagf(x));

}

__host__ __device__ static __inline__ cuFloatComplex cuCaddf (cuFloatComplex x,

                                                              cuFloatComplex y)

{

    return make_cuFloatComplex (cuCrealf(x) + cuCrealf(y), 

                                cuCimagf(x) + cuCimagf(y));

}

__host__ __device__ static __inline__ cuFloatComplex cuCsubf (cuFloatComplex x,

                                                              cuFloatComplex y)

{

        return make_cuFloatComplex (cuCrealf(x) - cuCrealf(y), 

                                    cuCimagf(x) - cuCimagf(y));

}

/* This implementation could suffer from intermediate overflow even though

 * the final result would be in range. However, various implementations do

 * not guard against this (presumably to avoid losing performance), so we 

 * don't do it either to stay competitive.

 */

__host__ __device__ static __inline__ cuFloatComplex cuCmulf (cuFloatComplex x,

                                                              cuFloatComplex y)

{

    cuFloatComplex prod;

    prod = make_cuFloatComplex  ((cuCrealf(x) * cuCrealf(y)) - 

                                 (cuCimagf(x) * cuCimagf(y)),

                                 (cuCrealf(x) * cuCimagf(y)) + 

                                 (cuCimagf(x) * cuCrealf(y)));

    return prod;

}

/* This implementation guards against intermediate underflow and overflow

 * by scaling. Such guarded implementations are usually the default for

 * complex library implementations, with some also offering an unguarded,

 * faster version.

 */

__host__ __device__ static __inline__ cuFloatComplex cuCdivf (cuFloatComplex x,

                                                              cuFloatComplex y)

{

    cuFloatComplex quot;

    float s = fabsf(cuCrealf(y)) + fabsf(cuCimagf(y));

    float oos = 1.0f / s;

    float ars = cuCrealf(x) * oos;

    float ais = cuCimagf(x) * oos;

    float brs = cuCrealf(y) * oos;

    float bis = cuCimagf(y) * oos;

    s = (brs * brs) + (bis * bis);

    oos = 1.0f / s;

    quot = make_cuFloatComplex (((ars * brs) + (ais * bis)) * oos,

                                ((ais * brs) - (ars * bis)) * oos);

    return quot;

}

/* 

 * We would like to call hypotf(), but it's not available on all platforms.

 * This discrete implementation guards against intermediate underflow and 

 * overflow by scaling. Otherwise we would lose half the exponent range. 

 * There are various ways of doing guarded computation. For now chose the 

 * simplest and fastest solution, however this may suffer from inaccuracies 

 * if sqrt and division are not IEEE compliant. 

 */

__host__ __device__ static __inline__ float cuCabsf (cuFloatComplex x)

{

    float a = cuCrealf(x);

    float b = cuCimagf(x);

    float v, w, t;

    a = fabsf(a);

    b = fabsf(b);

    if (a > b) {

        v = a;

        w = b; 

    } else {

        v = b;

        w = a;

    }

    t = w / v;

    t = 1.0f + t * t;

    t = v * sqrtf(t);

    if ((v == 0.0f) || (v > 3.402823466e38f) || (w > 3.402823466e38f)) {

        t = v + w;

    }

    return t;

}

/* Double precision */

typedef double2 cuDoubleComplex;

__host__ __device__ static __inline__ double cuCreal (cuDoubleComplex x) 

{ 

    return x.x; 

}

__host__ __device__ static __inline__ double cuCimag (cuDoubleComplex x) 

{ 

    return x.y; 

}

__host__ __device__ static __inline__ cuDoubleComplex make_cuDoubleComplex 

                                                           (double r, double i)

{

    cuDoubleComplex res;

    res.x = r;

    res.y = i;

    return res;

}

__host__ __device__ static __inline__ cuDoubleComplex cuConj(cuDoubleComplex x)

{

    return make_cuDoubleComplex (cuCreal(x), -cuCimag(x));

}

__host__ __device__ static __inline__ cuDoubleComplex cuCadd(cuDoubleComplex x,

                                                             cuDoubleComplex y)

{

    return make_cuDoubleComplex (cuCreal(x) + cuCreal(y), 

                                 cuCimag(x) + cuCimag(y));

}

__host__ __device__ static __inline__ cuDoubleComplex cuCsub(cuDoubleComplex x,

                                                             cuDoubleComplex y)

{

    return make_cuDoubleComplex (cuCreal(x) - cuCreal(y), 

                                 cuCimag(x) - cuCimag(y));

}

/* This implementation could suffer from intermediate overflow even though

 * the final result would be in range. However, various implementations do

 * not guard against this (presumably to avoid losing performance), so we 

 * don't do it either to stay competitive.

 */

__host__ __device__ static __inline__ cuDoubleComplex cuCmul(cuDoubleComplex x,

                                                             cuDoubleComplex y)

{

    cuDoubleComplex prod;

    prod = make_cuDoubleComplex ((cuCreal(x) * cuCreal(y)) - 

                                 (cuCimag(x) * cuCimag(y)),

                                 (cuCreal(x) * cuCimag(y)) + 

                                 (cuCimag(x) * cuCreal(y)));

    return prod;

}

/* This implementation guards against intermediate underflow and overflow

 * by scaling. Such guarded implementations are usually the default for

 * complex library implementations, with some also offering an unguarded,

 * faster version.

 */

__host__ __device__ static __inline__ cuDoubleComplex cuCdiv(cuDoubleComplex x,

                                                             cuDoubleComplex y)

{

    cuDoubleComplex quot;

    double s = (fabs(cuCreal(y))) + (fabs(cuCimag(y)));

    double oos = 1.0 / s;

    double ars = cuCreal(x) * oos;

    double ais = cuCimag(x) * oos;

    double brs = cuCreal(y) * oos;

    double bis = cuCimag(y) * oos;

    s = (brs * brs) + (bis * bis);

    oos = 1.0 / s;

    quot = make_cuDoubleComplex (((ars * brs) + (ais * bis)) * oos,

                                 ((ais * brs) - (ars * bis)) * oos);

    return quot;

}

/* This implementation guards against intermediate underflow and overflow

 * by scaling. Otherwise we would lose half the exponent range. There are

 * various ways of doing guarded computation. For now chose the simplest

 * and fastest solution, however this may suffer from inaccuracies if sqrt

 * and division are not IEEE compliant.

 */

__host__ __device__ static __inline__ double cuCabs (cuDoubleComplex x)

{

using std::sqrt;                      // EDITED BY J. KASTRUP

double a = cuCreal(x);

    double b = cuCimag(x);

    double v, w, t;

    a = fabs(a);

    b = fabs(b);

    if (a > b) {

        v = a;

        w = b; 

    } else {

        v = b;

        w = a;

    }

    t = w / v;

    t = 1.0 + t * t;

    t = v * sqrt(t);

    if ((v == 0.0) || 

        (v > 1.79769313486231570e+308) || (w > 1.79769313486231570e+308)) {

        t = v + w;

    }

    return t;

}

#if defined(__cplusplus)

}

#endif /* __cplusplus */

/* aliases */

typedef cuFloatComplex cuComplex;

__host__ __device__ static __inline__ cuComplex make_cuComplex (float x, 

                                                                float y) 

{ 

    return make_cuFloatComplex (x, y); 

}

/* float-to-double promotion */

__host__ __device__ static __inline__ cuDoubleComplex cuComplexFloatToDouble

                                                      (cuFloatComplex c)

{

    return make_cuDoubleComplex ((double)cuCrealf(c), (double)cuCimagf(c));

}

__host__ __device__ static __inline__ cuFloatComplex cuComplexDoubleToFloat

(cuDoubleComplex c)

{

	return make_cuFloatComplex ((float)cuCreal(c), (float)cuCimag(c));

}

__host__ __device__ static __inline__  cuComplex cuCfmaf( cuComplex x, cuComplex y, cuComplex d)

{

    float real_res;

    float imag_res;

real_res = (cuCrealf(x) *  cuCrealf(y)) + cuCrealf(d);

    imag_res = (cuCrealf(x) *  cuCimagf(y)) + cuCimagf(d);

real_res = -(cuCimagf(x) * cuCimagf(y))  + real_res;  

    imag_res =  (cuCimagf(x) *  cuCrealf(y)) + imag_res;          

return make_cuComplex(real_res, imag_res);

}

__host__ __device__ static __inline__  cuDoubleComplex cuCfma( cuDoubleComplex x, cuDoubleComplex y, cuDoubleComplex d)

{

    double real_res;

    double imag_res;

real_res = (cuCreal(x) *  cuCreal(y)) + cuCreal(d);

    imag_res = (cuCreal(x) *  cuCimag(y)) + cuCimag(d);

real_res = -(cuCimag(x) * cuCimag(y))  + real_res;  

    imag_res =  (cuCimag(x) *  cuCreal(y)) + imag_res;     

return make_cuDoubleComplex(real_res, imag_res);

}

#endif /* !defined(CU_COMPLEX_H_) */

cuComplex.h (11.5 KB)