compilation problems when replacing nvcc to pgc++

I am trying to compile the following code

nvcc -c code.cu

When I change it to:
pgc++ -c code.cu
I get the following error:

Warning: --mp could conflict with --gnu pthreads support

“/opt/pgi/linux86-64/2014/cuda/5.5/include/host_config.h”, line 119: catastrophic error:
cannot open source file “features.h”
#include <features.h> /* for __THROW */
^

1 catastrophic error detected in the compilation of “ext_lib.cu”.
Compilation terminated.


Thanks,
Barak

[/code]
#include <stdio.h>
#include
#include
#include
#include <math.h>
// #include “/usr/local/cuda-6.5/targets/x86_64-linux/include/cuda_runtime_api.h”
#include “/usr/local/cuda-6.5/include/cuda_runtime_api.h”

template
global void vec_inc(T *vecIn, T *vecOut, int len)
{
int i = (blockIdx.x * blockDim.x) + threadIdx.x;
if( i < len ) {
vecOut _= vecIn + 1;
//printf(“incrementing kernel: blockIdx = %d, threadIdx = %d, caculated element %d as %f (%f)\n”, blockIdx.x, threadIdx.x, i, vecOut, vecIn);
}
}

template
global void vec_sqr(T *vecIn, T *vecOut, int len)
{
int i = (blockIdx.x * blockDim.x) + threadIdx.x;
if( i < len ) {
vecOut = vecIn * vecIn;
//printf(“squaring kernel: blockIdx = %d, threadIdx = %d, caculated element %d as %f (%f)\n”, blockIdx.x, threadIdx.x, i, vecOut, vecIn);
}
}
template
class VectorOps
{
public:
VectorOps() {};
VectorOps(int len)
{
setLength(len);
}

void setLength(int len)
{
_len = len;
std::cout << std::dec << " VectorOps::setLength():: length set to: " << _len << std::endl;
}

VectorOps& operator <<(T *);
VectorOps& operator >>(T );
void inc();
void sqr();

protected:template
VectorOps& VectorOps::operator <<(T
input_seq)
{
for(int i = 0; i < _len; i++) {
_data.push_back(input_seq);
}

cudaMalloc(&_d_data, _len * sizeof(T));
cudaMemcpy(_d_data, input_seq, _len * sizeof(T), cudaMemcpyHostToDevice);

std::cout << " input sequnce received" << std::endl;

return this;
}

template
VectorOps& VectorOps::operator >>(T
output_seq)
{
//for(int i = 0; i < _len; i++) {
// output_seq[i] = _data[i];
//}

cudaMemcpy(output_seq, _d_data, _len * sizeof(T), cudaMemcpyDeviceToHost);
cudaFree(_d_data);
_d_data = 0;
std::cout << " output sequnce transmitted" << std::endl;

return *this;
}
template
void VectorOps::inc()
{
//for(int i = 0; i < _len; i++) {
//_data[i]++;
//}
int blocks, threads;

threads = 64;
blocks = (_len + threads - 1) / threads;

std::cout << “calling kernel with blocks = " << blocks << " threads = " << threads << " input legth = " << _len << std::endl;
vec_inc<<<blocks, threads>>>(_d_data, _d_data, _len);
std::cout << " stored sequence incremented” << std::endl;
}

template
void VectorOps::sqr()
{
//for(int i = 0; i < _len; i++) {
//_data[i] = pow(_data[i], 2);
//}

int blocks, threads;

threads = 64;
blocks = (_len + threads - 1) / threads;

std::cout << "calling kernel with blocks = " << blocks << " threads = " << threads << " input legth = " << _len << std::endl;

vec_sqr<<<blocks, threads>>>(_d_data, d_data, len);

std::cout << " stored sequence squared" << std::endl;
}
template
void external_processing_imp(T *seq, int *n)
{
std::cout << " library function received " << std::hex <<
(long) seq << std::endl;

VectorOps vo(*n);

vo << seq;
vo.inc();
vo.sqr();
vo >> seq;
}
extern “C”
{

void external_processing_i
(int *n, int **pn)
{
external_processing_imp(*pn, n);
}

void external_processing_f
(int *n, double **pf)
{
external_processing_imp(*pf, n);
}
}

int _len;
std::vector _data;

T *d_data;
};



[b][/b]

Hi Barak,

pgc++ doesn’t support compiling CUDA C device code. Currently only nvcc is able to compile this code.

  • Mat

Hi Mat,
In fact I want to use this code as a C++ module called by fortran. See the code below:
Is there a way to compile the C++ code with nvcc and join them using pgfortran?

Barak

PROGRAM load_lib
!This program
IMPLICIT NONE
INTEGER, PARAMETER :: LEN = 1000, UB = LEN-1
INTEGER seq(0:UB)
INTEGER n
INTEGER l
INTEGER*8 pn
POINTER (pn, l)

REAL8, DIMENSION(0:UB) :: seqf
INTEGER
8 pf
REAL*8 f
POINTER (pf, f)

! real*8,DIMENSION(2) :: Az_min_max

print*
print*,'seq = ’
do n = 0, UB, 1
seq(n) = n
seqf(n) = n
print*, ‘seq[’, n, '] = ', seq(n), ‘, seqf[’, n, '] = ', seqf(n)
end do

print*, ‘Calling external module…’
print*
print*, ‘---------- external modulde output start ----------’
print*

n = LEN
pn = loc(seq)
pf = loc(seqf)
!CALL external_processing_i(seq, n, pn)
CALL external_processing_f(n, pf)

print*
print*, ‘---------- external modulde output end ----------’
print*
500 print*,‘Done’

print*,'seq = ’
do n = 0, UB, 1
print*,seqf(n)
end do

print*

END PROGRAM load_lib

Sure. You just compile the CUDA file with nvcc then link with pgfortran. Adding “-Mcuda” will link in the CUDA libraries.

The code as written is just Fortran calling C++ since you’re calling a host C++ routine. You can also call a CUDA C global device routine directly and use device data since we support CUDA Fortran.

In either case, I would recommend you add an Interface block using F2003 ISO_C_BINDING.

For reference:


PGI User’s Guide Chapter 13 Inter-language Calling: http://www.pgroup.com/doc/pgiug.pdf
  • Mat