I am trying to build WRFV3 with PGI Acceleration. I have tried the following PGI versions: 11.10, 12.9, and 12.10, with MPICH, with OpenMPI; but I am always getting the errors below:
( cd frame ; make -i -r -j 2 framework; \
cd ../external/io_netcdf ; \
make -i -r NETCDFPATH="/usr/local" FC="pgf90 -w -Mfree -byteswapio " RANLIB="ranlib" \
CPP="/lib/cpp -C -P" LDFLAGS=" -fastsse -Mvect=noaltcode -Msmartalloc -Mprefetch=distance:8 -ta=nvidia:fastmath,host -w -Mfree -byte
<------> LIB_LOCAL="" \
ESMF_MOD_DEPENDENCE="/test/test/WRFV3/external/esmf_time_f90/module_utility.o" AR="INTERNAL_BUILD_ERROR_SHOULD_NOT_NEED_AR" diffwrf; \
cd ../io_int ; \
make -i -r SFC="pgf90 -w -Mfree -byteswapio " FC="pgf90 -w -Mfree -byteswapio " RANLIB="ranlib" CPP="/lib/cpp -C -P" \
TRADFLAG="-traditional" ESMF_IO_LIB_EXT="-L/test/test/WRFV3/external/esmf_time_f90 -lesmf_time" \
ESMF_MOD_DEPENDENCE="/test/test/WRFV3/external/esmf_time_f90/module_utility.o" AR="INTERNAL_BUILD_ERROR_SHOULD_NOT_NEED_AR" diffwrf ;
cd ../../frame )
if [ -n "" ] ; then echo COMPILING module_mp_wsm5.F WITHOUT OMP ; fi ; \
<------> mpif90 -f90=pgf90 -o module_mp_wsm5.o -c -fastsse -Mvect=noaltcode -Msmartalloc -Mprefetch=distance:8 -ta=nvidia:fastmath,host -w -Mfree
fi
NOTE: your trial license will expire in 7 days, 5.02 hours.
NOTE: your trial license will expire in 7 days, 5.01 hours.
### Assertion failure at line 1923 of ../../be/cg/cgemit.cxx:
### Compiler Error in file /tmp/pgnvdcIBegDUCPOFM.i during Assembly phase:
### incorrect register class for result 0
### Assertion failure at line 1923 of ../../be/cg/cgemit.cxx:
### Compiler Error in file /tmp/pgnvdb0BedIJ-Y7hC.i during Assembly phase:
### incorrect register class for result 0
0 inform, 2 warnings, 0 severes, 0 fatal for wsm52d
rm -f module_mp_wsm6.o
Below is my system information:
$Uname –a
Linux 3.2.0-32-generic #51-Ubuntu SMP Wed Sep 26 21:33:09 UTC 2012 x86_64 x86_64 x86_64 GNU/Linux
$Gcc –v
gcc version 4.6.3 (Ubuntu/Linaro 4.6.3-1ubuntu5)
$Pgaccelinfo
CUDA Driver Version: 5000
NVRM version: NVIDIA UNIX x86_64 Kernel Module 304.54 Sat Sep 29 00:05:49 PDT 2012
Device Number: 0
Device Name: GeForce GTX 650
Device Revision Number: 3.0
Global Memory Size: 2146762752
Number of Multiprocessors: 2
Number of SP Cores: 384
Number of DP Cores: 128
Concurrent Copy and Execution: Yes
Total Constant Memory: 65536
Total Shared Memory per Block: 49152
Registers per Block: 65536
Warp Size: 32
Maximum Threads per Block: 1024
Maximum Block Dimensions: 1024, 1024, 64
Maximum Grid Dimensions: 2147483647 x 65535 x 65535
Maximum Memory Pitch: 2147483647B
Texture Alignment: 512B
Clock Rate: 1110 MHz
Execution Timeout: No
Integrated Device: No
Can Map Host Memory: Yes
Compute Mode: default
Concurrent Kernels: Yes
ECC Enabled: No
Memory Clock Rate: 2500 MHz
Memory Bus Width: 128 bits
L2 Cache Size: 262144 bytes
Max Threads Per SMP: 2048
Async Engines: 1
Unified Addressing: Yes
Initialization time: 509196 microseconds
Current free memory: 2123882496
Upload time (4MB): 823 microseconds ( 645 ms pinned)
Download time: 945 microseconds ( 637 ms pinned)
Upload bandwidth: 5096 MB/sec (6502 MB/sec pinned)
Download bandwidth: 4438 MB/sec (6584 MB/sec pinned)
configure.wrf
# Settings for Linux x86_64, PGI accelerator compiler with gcc (dmpar)
#
DMPARALLEL = 1
OMPCPP = # -D_OPENMP
OMP = # -mp -Minfo=mp -Mrecursive
OMPCC = # -mp
SFC = pgf90
SCC = gcc
CCOMP = pgcc
DM_FC = mpif90 -f90=$(SFC)
DM_CC = mpicc -DMPI2_SUPPORT
FC = $(DM_FC)
CC = $(DM_CC) -DFSEEKO64_OK.
LD = $(FC)
RWORDSIZE = $(NATIVE_RWORDSIZE)
PROMOTION = -r$(RWORDSIZE) -i4
ARCH_LOCAL = -DNONSTANDARD_SYSTEM_SUBR -D_ACCEL
CFLAGS_LOCAL = -w -O3
LDFLAGS_LOCAL =
CPLUSPLUSLIB =
ESMF_LDFLAG = $(CPLUSPLUSLIB)
FCOPTIM = -fastsse -Mvect=noaltcode -Msmartalloc -Mprefetch=distance:8 -ta=nvidia:fastmath,host # -Minfo=all =Mneginfo=all
FCREDUCEDOPT = $(FCOPTIM)
FCNOOPT = -O0
FCDEBUG = # -g $(FCNOOPT)
FORMAT_FIXED = -Mfixed
FORMAT_FREE = -Mfree
FCSUFFIX =
BYTESWAPIO = -byteswapio
FCBASEOPTS = -w $(FCDEBUG) $(FORMAT_FREE) $(BYTESWAPIO) $(OMP)
MODULE_SRCH_FLAG = -module $(WRF_SRC_ROOT_DIR)/main
TRADFLAG = -traditional
CPP = /lib/cpp -C -P
AR = ar
ARFLAGS = ru
M4 = m4 -B 14000
RANLIB = ranlib
CC_TOOLS = $(SCC).
Could you please advise? Thanks in advance.
Aro