Error when calling thrust from Fortran by using PVF 14.6

Hello! I have successfully to call thrust in Fortran in PVF 13.9. The Fortran codes are as follows:

testsort.cuf

program testsort
	use cudafor
	use thrust
	implicit none
	real, allocatable :: arr1(:)
	real, allocatable, device :: arr1_d(:)
	integer, allocatable :: arr2(:)
	integer, allocatable, device :: arr2_d(:)
	real, allocatable :: arr3(:)
	real, allocatable, device :: arr3_d(:)
	integer, allocatable :: idx3(:)
	integer, allocatable, device :: idx3_d(:)
	integer, allocatable :: arr4(:)
	integer, allocatable, device :: arr4_d(:)
	integer, allocatable :: idx4(:)
	integer, allocatable, device :: idx4_d(:)
	integer :: i,N=1000000

	! cuda events for elapsing
	type(cudaEvent) :: startEvent,stopEvent
	real :: time,random
	integer :: istat

	! Create events
	istat=cudaEventCreate(startEvent)
	istat=cudaEventCreate(stopEvent)

	! Allocate CPU and GPU arrays
	allocate(arr1(N),arr1_d(N),arr2(N),arr2_d(N),&
	         arr3(N),arr3_d(N),idx3(N),idx3_d(N),arr4(N),arr4_d(N),idx4(N),idx4_d(N))

	call random_seed()
	! Fill the host array with random data
	do i=1,N
		call random_number(random)
		arr1(i)=random
		call random_number(random)
		arr2(i)=random*100
		call random_number(random)
		arr3(i)=random
		call random_number(random)
		arr4(i)=random*100
		idx3(i)=i
		idx4(i)=i
	end do

    print *, "Sorting array of ",N,"Single precision"
	print *, ""

	! Send data to GPU
    arr1_d=arr1
	arr2_d=arr2
	arr3_d=arr3
	arr4_d=arr4
	idx3_d=idx3
	idx4_d=idx4

	istat=cudaEventRecord(startEvent,0)
    call thrustsort(arr1_d,N)
	call thrustsort(arr2_d,N)
	call thrustsort(arr3_d,idx3_d,N)
	call thrustsort(arr4_d,idx4_d,N)
	istat=cudaEventRecord(stopEvent,0)
	istat=cudaEventSynchronize(stopEvent)
	istat=cudaEventElapsedTime(time,startEvent,stopEvent)

	! Copy the result back
    arr1=arr1_d
	arr2=arr2_d
	arr3=arr3_d
	arr4=arr4_d
	idx3=idx3_d
	idx4=idx4_d

	! Print sorted data
    print *, "Sorted array in:",time," (ms)"

	! Print the first five elements and the last five.
	print *,"After sorting arr1",arr1(1:5),arr1(N-4:N)
	print *,"After sorting arr2",arr2(1:5),arr2(N-4:N)
	print *,"After sorting arr3",arr3(1:5),arr3(N-4:N)
	print *,"After sorting arr4",arr4(1:5),arr4(N-4:N)
	print *,"After sorting idx3",idx3(1:5),idx3(N-4:N)
	print *,"After sorting idx4",idx4(1:5),idx4(N-4:N)

	! Deallocate arrays
    deallocate(arr1,arr2,arr3,arr4,arr1_d,arr2_d,arr3_d,arr4_d,idx3,idx4,idx3_d,idx4_d)
	istat=cudaEventDestroy(startEvent)
	istat=cudaEventDestroy(stopEvent)

	pause
	stop
end program testsort

thrust_module.cuf

module thrust
	interface thrustsort
		subroutine sort_int( input,N) &
			bind(C,name="sort_int_wrapper")
			use iso_c_binding
			integer(c_int),device:: input(*)
			integer(c_int),value:: N
		end subroutine
		subroutine sort_float( input,N) &
			bind(C,name="sort_float_wrapper")
			use iso_c_binding
			real(c_float),device:: input(*)
			integer(c_int),value:: N
		end subroutine
		subroutine sort_by_key_int_int( input, idx, N) &
			bind(C,name="sort_by_key_int_int_wrapper")
			use iso_c_binding
			integer(c_int),device:: input(*)
			integer(c_int),device:: idx(*)
			integer(c_int),value:: N
		end subroutine
		subroutine sort_by_key_float_int( input, idx, N) &
			bind(C,name="sort_by_key_float_int_wrapper")
			use iso_c_binding
			real(c_float),device:: input(*)
			integer(c_int),device:: idx(*)
			integer(c_int),value:: N
		end subroutine
	end interface
end module

csort.cu

#include <thrust/device_vector.h>
#include <thrust/sort.h>
extern "C" {
        //Sort for integer arrays
        void sort_int_wrapper( int *data, int N)
        {
          // Wrap raw pointer with a device_ptr
          thrust::device_ptr<int> dev_ptr(data);
          // Use device_ptr in Thrust sort
          // algorithm
          thrust::sort(dev_ptr, dev_ptr+N);
        }
        //Sort for single precision arrays
        void sort_float_wrapper( float *data, int N)
        {
          thrust::device_ptr<float> dev_ptr(data);
          thrust::sort(dev_ptr, dev_ptr+N);
        }
        
		void sort_by_key_int_int_wrapper(int *data, int *idx, int N)
		{
			thrust::device_ptr<int> dev_ptr(data);
			thrust::device_ptr<int> dev_ptr_idx(idx);
			thrust::sort_by_key(dev_ptr,dev_ptr+N,dev_ptr_idx);
		}

		void sort_by_key_float_int_wrapper(float *data, int *idx, int N)
		{
			thrust::device_ptr<float> dev_ptr(data);
			thrust::device_ptr<int> dev_ptr_idx(idx);
			thrust::sort_by_key(dev_ptr,dev_ptr+N,dev_ptr_idx);
		}
}

and the csort.cu is compiled by vs2012 with the command as:
nvcc -c -arch sm_10 csort.cu

But when I swich to PVF 14.6 error accurs as:
[/code]Deleting intermediate and output files for project ‘CudaFortranCallThrustSortByKey’, configuration ‘Debug’
Compiling Project …
testsort.cuf
PGF90/x86 Windows 14.6-0: compilation aborted
Module not found while generating build dependencies; will retry testsort.cuf
thrust_module.cuf
testsort.cuf
Linking…
libcpmt.lib(xthrow.obj) : error LNK2038: mismatch detected for ‘_MSC_VER’: value ‘1800’ doesn’t match value ‘1700’ in csort.obj
libcpmt.lib(syserror.obj) : error LNK2038: mismatch detected for ‘_MSC_VER’: value ‘1800’ doesn’t match value ‘1700’ in csort.obj
D:\PGI Visual Fortran 14.6\CudaFortranCallThrustSortByKey\CudaFortranCallThrustSortByKey\Win32\Debug\CudaFortranCallThrustSortByKey.exe : fatal error LNK1319: 2 mismatches detected
D:\PGI Visual Fortran 14.6\CudaFortranCallThrustSortByKey\CudaFortranCallThrustSortByKey\Win32\Debug/CudaFortranCallThrustSortByKey.exf: error STP001: cannot open file
CudaFortranCallThrustSortByKey build failed.

I want to know how to solve this problem? Thank you very much!

Nightwish

Hi Nightwish,

We don’t target CC1.0 by default any longer. You can still target it, but just need to explicitly set -Mcuda=cc10 or -Mcuda=tesla. In the PVF property pages, you’d select “CUDA Fortran tesla” under Fortran->language.

Though, do you really want to target 1.0 devices?

  • Mat

Because my device is a Geforce 8400mgs card with cc 1.1.

I have tried all the combinations from cc10 to cc35, and the same error accurs.

Though observing the errors:

libcpmt.lib(xthrow.obj) : error LNK2038: mismatch detected for ‘_MSC_VER’: value ‘1800’ doesn’t match value ‘1700’ in csort.obj
libcpmt.lib(syserror.obj) : error LNK2038: mismatch detected for ‘_MSC_VER’: value ‘1800’ doesn’t match value ‘1700’ in csort.obj

I found that ‘1800’ corresponds to visual studio 2013 and ‘1700’ corresponds to visual studio 2012. The csort.obj is compiled by the vs2012 and CUDA 5.5 nvcc (it seems like they corresponds to ‘1700’). I suppose may be I should compile it with the vs2013 and CUDA 6.0 nvcc (I suppose that they corresponds to ‘1800’). So I install vs2013 and CUDA 6.0. But finally I found that csort.obj compiled by CUDA 6.0 nvcc still not working. The same error accurs.

Therefore I uninstall the PVF14.6 and install the PVF13.9. The PVF13.9 can call thrust successfully and seems more stable and robust.

Thank you very much!

Nightwish

Hi Nightwish,

From your description, this doesn’t sound like an issue with PGI’s tools rather the MSC version the CUDA tools use. Also, CUDA 6.0 has deprecated support for CC 1.0 targets so you’ll most likely want to use older tools chains in order to target your device.

  • Mat