#include
#include<stdio.h>
using namespace std;
#include<cuda.h>
#define INF 10000
void readinputs(int e, int from, int to, int start, int duration)
{
for(int i=0;i<e;i++)
{
cin>>from[i]>>to[i]>>start[i];
duration[i]=1;
}
}
global
void valuate(int n, int e, int *from, int *to, int *start, int
*duration, int src,int *lock, int *earliest, int *critical)
{
int i=threadIdx.x;
while(lock[from[i]]==0)
{
//do nothing
}
if(earliest[from[i]]<=start[i])
{
while(critical[to[i]]==1)
{
//do nothing
}
critical[to[i]]=1;
earliest[to[i]]=min(earliest[to[i]], start[i]+duration[i]);
lock[to[i]]=1;
critical[to[i]]=0;
}
}
int main()
{
int n;
cin>>n;
int e;
cin>>e;
int from[e], to[e], start[e], duration[e];
readinputs(e,from,to,start, duration);
int lock[n];
fill_n(lock, n, 0);
int earliest[n];
fill_n(earliest, n, INF);
cout<<endl;
int src;
cin>>src;
earliest[src]=0;
lock[src]=1;
int critical[n];
fill_n(critical, n, 0);
int cfrom, cto, cstart, cduration, cearliest;
int clock, ccritical;
cudaMalloc( (void)&cfrom, esizeof(int) );
cudaMalloc( (void)&cto, esizeof(int));
cudaMalloc( (void*)&cstart, esizeof(int) );
cudaMalloc( (void**)&cduration, esizeof(int ));
cudaMalloc( (void**)&cearliest, nsizeof(int) );
cudaMalloc( (void**)&clock, nsizeof(int) );
cudaMalloc( (void**)&ccritical, nsizeof(int));
cudaMemcpy( cfrom, from, esizeof(int), cudaMemcpyHostToDevice );
cudaMemcpy( ccritical, critical, nsizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy( cto, to, esizeof(int), cudaMemcpyHostToDevice );
cudaMemcpy( cstart, start, esizeof(int), cudaMemcpyHostToDevice );
cudaMemcpy( cduration, duration, esizeof(int), cudaMemcpyHostToDevice );
cudaMemcpy( cearliest, earliest, nsizeof(int), cudaMemcpyHostToDevice );
cudaMemcpy( clock, lock, nsizeof(int), cudaMemcpyHostToDevice );
// end of sending
cout<<“now running the kernel”<<endl;
valuate<<<1,e>>>(n,e,cfrom,cto,cstart, cduration, src, clock,
cearliest, ccritical);
cout<<“kernel ran”<<endl;
/*cudaFree(cfrom);
cudaFree(cto);
cudaFree(cstart);
cudaFree(cduration);
cudaFree(clock);
/cout<<“MEMORY FOR EARLIEST TIME ARRAY COPYING BACK TO CPU”<<endl;
cudaMemcpy(earliest, cearliest, nsizeof(int), cudaMemcpyDeviceToHost);
cudaFree(cearliest);
cout<<“Memory copied”<<endl;
for(int i=0;i<n;i++)
{
cout<<"Earliest time for “<<i<<” is: "<<earliest[i]<<endl;
}
return EXIT_SUCCESS;
}
The code is for shortest arrival time in temporal graph. I have written a multithreaded version of the same code and there it gives the correct outputs. Here it stops after printing “MEMORY FOR EARLIEST TIME ARRAY COPYING BACK TO CPU” when giving the same inputs. What could be possibly wrong here?