Hi everybody External Media ,
I encounter a big problem with the update of new values in a very very basic API.
Indeed, I try to simulate a basc circuit with a not operator. I realize that with a class “signal” which have two values:
one for the current cycle (t0)
another for the nex cycle (t1)
Between each cycle, the current state of the signal is updated owing to the function maj( ). It just do t0=t1
This precisely where I have my problem: the change seems to be ignored! => t0 gives its value! External Media
This is the code:
#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
#include <cuda_runtime.h>
class signal
{
bool _t0,_t1;
public:
__device__
signal()
{
}
__device__
void init(bool valeur)
{
_t0=valeur;
_t1=valeur;
}
__device__
bool t1()
{
return _t1;
}
__device__
bool t0()
{
return _t0;
}
__device__
void w(bool val)
{
_t1=val;
}
__device__
void maj()
{
_t0=_t1;
threadfence();
};
};
__global__
void kernel(int n_cycles,int t_cycle,int* retour)
{
signal s;
s.init(true);
int i;
for(i=0;i<n_cycles;i++)
{
retour[t_cycle*i]=i;
retour[t_cycle*i+1]=s.t0();
retour[t_cycle*i+2]=s.t1();
retour[t_cycle*i+3]=~s.t0();
s.w(~s.t0());
threadfence();
retour[t_cycle*i+4]=s.t1();
s.maj();
threadfence();
}
}
//============================================================
host
int main()
{
int t_num_cycle =1;
int t_cycle =4+t_num_cycle;
int n_cycles =100000/t_cycle;
int t_buffer =t_cycle*n_cycles;
int t_bytes_buffer =t_buffer*sizeof(int);
FILE* pFile;
pFile = fopen ("simu2.trace","w");
int retour[t_buffer];
int* retour_device;
cudaFuncSetCacheConfig(kernel, cudaFuncCachePreferL1);
CUevent start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaMalloc ( (void**) &retour_device, t_bytes_buffer);
cudaEventRecord(start, 0);
kernel<<<1,100>>>(n_cycles,t_cycle,retour_device);
cudaThreadSynchronize();
cudaMemcpy(retour,retour_device,t_bytes_buffer,cudaMemcpyDeviceToHo
st);
cudaEventRecord(stop, 0);
int j;
for(j=0;j<n_cycles;j++)
{
fprintf (pFile, "cycle %5i t0:%i t1:%i ~t0:%i t1=~t0:%i \n",retour[j*t_cycle],retour[j*t_cycle+1],retour[j*t_cycle+2],retour[j*t_cycle+3],retour[j*t_cycle+4]);
}
fclose (pFile);
cudaEventSynchronize(stop);
float elapsedTime;
cudaEventElapsedTime(&elapsedTime, start, stop);
printf("===> %f ms",elapsedTime);
printf("\nnombre de cycles: %i \ntaille d'un cycle: %i \ntaille du buffer en int: %i \ntaille du buffer en octets: %i \n" ,n_cycles,t_cycle,t_buffer,t_bytes_buffer);
return 0;
}
and the trace:
So I do not understand anything: do you any idea please? :)