Why does my code stuck and doesn't go any further?

#include
#include<stdio.h>
using namespace std;
#include<cuda.h>
#define INF 10000

void readinputs(int e, int from, int to, int start, int duration)
{
for(int i=0;i<e;i++)
{
cin>>from[i]>>to[i]>>start[i];
duration[i]=1;
}

}

global
void valuate(int n, int e, int *from, int *to, int *start, int
*duration, int src,int *lock, int *earliest, int *critical)
{
int i=threadIdx.x;
while(lock[from[i]]==0)
{
//do nothing
}
if(earliest[from[i]]<=start[i])
{
while(critical[to[i]]==1)
{
//do nothing
}
critical[to[i]]=1;
earliest[to[i]]=min(earliest[to[i]], start[i]+duration[i]);

lock[to[i]]=1;
critical[to[i]]=0;
}
}

int main()
{
int n;
cin>>n;
int e;
cin>>e;
int from[e], to[e], start[e], duration[e];
readinputs(e,from,to,start, duration);
int lock[n];
fill_n(lock, n, 0);
int earliest[n];
fill_n(earliest, n, INF);
cout<<endl;

int src;
cin>>src;
earliest[src]=0;
lock[src]=1;
int critical[n];
fill_n(critical, n, 0);

int cfrom, cto, cstart, cduration, cearliest;
int clock, ccritical;
cudaMalloc( (void
)&cfrom, e
sizeof(int) );
cudaMalloc( (void
)&cto, e
sizeof(int));
cudaMalloc( (void
*)&cstart, esizeof(int) );
cudaMalloc( (void**)&cduration, e
sizeof(int ));
cudaMalloc( (void**)&cearliest, nsizeof(int) );
cudaMalloc( (void**)&clock, n
sizeof(int) );
cudaMalloc( (void**)&ccritical, nsizeof(int));
cudaMemcpy( cfrom, from, e
sizeof(int), cudaMemcpyHostToDevice );
cudaMemcpy( ccritical, critical, nsizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy( cto, to, e
sizeof(int), cudaMemcpyHostToDevice );
cudaMemcpy( cstart, start, esizeof(int), cudaMemcpyHostToDevice );
cudaMemcpy( cduration, duration, e
sizeof(int), cudaMemcpyHostToDevice );
cudaMemcpy( cearliest, earliest, nsizeof(int), cudaMemcpyHostToDevice );
cudaMemcpy( clock, lock, n
sizeof(int), cudaMemcpyHostToDevice );

// end of sending
cout<<“now running the kernel”<<endl;
valuate<<<1,e>>>(n,e,cfrom,cto,cstart, cduration, src, clock,
cearliest, ccritical);
cout<<“kernel ran”<<endl;
/*cudaFree(cfrom);
cudaFree(cto);
cudaFree(cstart);
cudaFree(cduration);
cudaFree(clock);
/cout<<“MEMORY FOR EARLIEST TIME ARRAY COPYING BACK TO CPU”<<endl;
cudaMemcpy(earliest, cearliest, n
sizeof(int), cudaMemcpyDeviceToHost);
cudaFree(cearliest);
cout<<“Memory copied”<<endl;
for(int i=0;i<n;i++)
{
cout<<"Earliest time for “<<i<<” is: "<<earliest[i]<<endl;
}
return EXIT_SUCCESS;
}

The code is for shortest arrival time in temporal graph. I have written a multithreaded version of the same code and there it gives the correct outputs. Here it stops after printing “MEMORY FOR EARLIEST TIME ARRAY COPYING BACK TO CPU” when giving the same inputs. What could be possibly wrong here?

I would suggest adding status checks to all CUDA API calls, and to check the status of each kernel launch.

Use Google to search for: CUDA error checking