I have just write a segment of code using thrust? However, I cannot
profile the code using the visual profiler 5.0 with follow hits :
"Unable to read the entire session timeline. The displayed timeline
may be empty or incmplete because the application aborted or because
the excution timeout was reached before all timeline data was written.
"
the full code is
[i]#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include
using namespace std;
#define N 40000000
struct innerProduct
{
template
host device
void operator()(Tuple t)
{
thrust::get<6>(t) = thrust::get<0>(t)*thrust::get<3>(t) +
thrust::get<1>(t)*thrust::get<4>(t)
+thrust::get<2>(t)*thrust::get<5>(t) ;
}
};
struct crossProduct
{
template
host device
void operator()(Tuple t)
{
thrust::get<6>(t) = thrust::get<1>(t)*thrust::get<5>(t) -
thrust::get<2>(t)*thrust::get<4>(t);
thrust::get<7>(t) = thrust::get<2>(t)*thrust::get<3>(t) -
thrust::get<0>(t)*thrust::get<5>(t);
thrust::get<8>(t) = thrust::get<0>(t)*thrust::get<4>(t) -
thrust::get<1>(t)*thrust::get<3>(t);
}
};
int main(void)
{
thrust::device_vector d_ax(N);
thrust::device_vector d_ay(N);
thrust::device_vector d_az(N);
thrust::fill(d_ax.begin(),d_ax.end(), 1.f);
thrust::fill(d_ay.begin(),d_ay.end(),2.f);
thrust::fill(d_az.begin(),d_az.end(), 3.f);
thrust::device_vector d_bx(N);
thrust::device_vector d_by(N);
thrust::device_vector d_bz(N);
thrust::fill(d_bx.begin(),d_bx.end(), 2.f);
thrust::fill(d_by.begin(),d_by.end(),2.f);
thrust::fill(d_bz.begin(),d_bz.end(), 3.f);
thrust::device_vector d_c(N);
thrust::device_vector d_dx(N);
thrust::device_vector d_dy(N);
thrust::device_vector d_dz(N);
thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple
(
d_ax.begin(),
d_ay.begin(), d_az.begin(),
d_bx.begin(),
d_by.begin(), d_bz.begin(),
d_c.begin()
)
),
thrust::make_zip_iterator(thrust::make_tuple
(
d_ax.end(),
d_ay.end(), d_az.end(),
d_bx.end(),
d_by.end(), d_bz.end(),
d_c.end()
)
),
innerProduct());
thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple
(
d_ax.begin(),
d_ay.begin(), d_az.begin(),
d_bx.begin(),
d_by.begin(), d_bz.begin(),
d_dx.begin(),
d_dy.begin(), d_dz.begin()
)
),
thrust::make_zip_iterator(thrust::make_tuple
(
d_ax.end(),
d_ay.end(), d_az.end(),
d_bx.end(),
d_by.end(), d_bz.end(),
d_dx.end(),
d_dy.end(), d_dz.end()
)
),
crossProduct());
thrust::host_vector c = d_c;
thrust::host_vector h_dx = d_dx;
thrust::host_vector h_dy= d_dy;
thrust::host_vector h_dz = d_dz;
thrust::host_vector d(20);
thrust::copy(d_c.begin(), d_c.begin()+10, d.begin());
thrust::copy(d_dx.begin(), d_dx.begin()+10, d.begin()+10);
cout<<"size of c is "<<c.size()<<endl;
for(int i=0; i<20; i++)
{
cout<<d[i]<<endl;
}
cout<<"size of h_dx is "<<h_dx.size()<<endl;
for(int i=0 ; i<10; i++)
{
cout<<h_dx[i]<<" “<<h_dy[i]<<” "<<h_dz[i]<<endl;
}
return 0;
}[/i]
I compile it with the following command:
nvcc --profile -arch=sm_20 thrustTest.cu -o thrustTest
and use the visual profiler to profile thrustTest
my platform :
GPU: gtx 680
Cuda version 5.0
Windows 7, 64bit.
Any problem ?
Would you please give me some suggestion?
Regards, Junwei