Hi
My code has different outputs compared with the sequential execution.
Am I doing something wrong?
My code:
#include <iostream>
#include <stdlib.h>
#include <sys/time.h>
#include <time.h>
#include <math.h>
using namespace std;
int main() {
struct timeval inicio, fim;
int tmili;
double * hidden_bias;
double * output_bias;
double ** input_weights;
double ** output_weights;
double ** computation;
double ** inputs_lote;
double ** outputs_lote;
unsigned int qtd_entradas_saidas = 5;
unsigned int entradas=30;
unsigned int saidas=5;
unsigned int altura=1000;
hidden_bias = (double *)malloc(sizeof(double)*altura);
output_bias = (double *)malloc(sizeof(double)*saidas);
input_weights = (double **)malloc(sizeof(double)*altura);
for(unsigned int i=0; i < altura; i++){
input_weights[i] = (double *)malloc(sizeof(double)*entradas);
}
output_weights = (double **)malloc(sizeof(double)*saidas);
for(unsigned int i=0; i < saidas; i++){
output_weights[i] = (double *)malloc(sizeof(double)*altura);
}
computation = (double **)malloc(sizeof(double)*qtd_entradas_saidas);
for(unsigned int i=0; i < qtd_entradas_saidas; i++){
computation[i] = (double *)malloc(sizeof(double)*altura);
}
inputs_lote = (double **)malloc(sizeof(double)*qtd_entradas_saidas);
for(unsigned int i=0; i < qtd_entradas_saidas; i++){
inputs_lote[i] = (double *)malloc(sizeof(double)*entradas);
}
outputs_lote = (double **)malloc(sizeof(double)*qtd_entradas_saidas);
for(unsigned int i=0; i < qtd_entradas_saidas; i++){
outputs_lote[i] = (double *)malloc(sizeof(double)*saidas);
}
int semente = 10;
//Inicializar a semente para rand
srand(semente);
//passar por parametro vetor gerado aleatoreamente
//cout << "Gerando pesos";
double pesos[entradas * altura + saidas * altura + altura + saidas];
for(unsigned int i=0; i<(entradas * altura + saidas * altura + altura + saidas); i++){
double num = (rand()/(double)RAND_MAX); //TODO: conferir
if(rand()%2==0){
num = num * -1;
}
pesos[i] = num;
//cout << pesos[i] << ", ";
}
//passar por parametro vetor gerado aleatoreamente
//cout << "\nEntradas: ";
for(unsigned int i = 0; i<qtd_entradas_saidas; i++){
for(unsigned int j = 0; j<entradas; j++){
double num = (rand()/(double)RAND_MAX); //TODO: conferir
if(rand()%2==0){
num = num * -1;
}
inputs_lote[i][j] = num;
//cout << "Entrada[" << i << "][" << j << "] = " << inputs_lote[i][j] << "\n";
}
//cout << "\n";
}
//Definindo Pesos
unsigned int v = 0;
for(unsigned int h = 0; h < altura; h++)
{
hidden_bias[h] = pesos[v++];
}
for(unsigned int o = 0; o < saidas; o++)
{
output_bias[o] = pesos[v++];
}
for(unsigned int h = 0; h < altura; h++)
{
for(unsigned int i = 0; i < entradas; i++)
{
input_weights[h][i] = pesos[v++];
}
}
for(unsigned int o = 0; o < saidas; o++)
{
for(unsigned int w = 0; w < altura; w++)
{
output_weights[o][w] = pesos[v++];
}
}
//cout << "\nExecutando a rede";
gettimeofday(&inicio, NULL);
#pragma acc data copyin(hidden_bias[0:altura],inputs_lote[0:qtd_entradas_saidas][0:entradas],input_weights[0:altura][0:entradas],output_bias[0:saidas],output_weights[0:saidas][0:altura]), create(computation[0:qtd_entradas_saidas][0:altura]), copyout(outputs_lote[0:qtd_entradas_saidas][0:saidas])
#pragma acc parallel
{
#pragma acc loop gang(512)
for(unsigned int p=0; p<qtd_entradas_saidas; p++){
#pragma acc loop worker(512)
for(unsigned int height = 0; height < altura; height++)
{
double sinapse = hidden_bias[height];
#pragma acc loop seq
for(unsigned int weight = 0; weight < entradas; weight++)
{
sinapse += inputs_lote[p][weight] * input_weights[height][weight];
}
computation[p][height] = sinapse > 0 ? 1.0 : 0.0;
}
#pragma acc loop worker(5)
for(unsigned int height = 0; height < saidas; height++)
{
double sinapse = output_bias[height];
#pragma acc loop seq
for(unsigned int weight = 0; weight < altura; weight++)
{
sinapse += computation[p][weight] * output_weights[height][weight];
}
outputs_lote[p][height] = sinapse > 0 ? 1.0 : 0.0;
}
}
}
gettimeofday(&fim, NULL);
cout << "\nOutputs: ";
for(unsigned int i=0; i<qtd_entradas_saidas; i++){
for(unsigned int j = 0; j<saidas; j++){
cout << "Saida[" << i << "][" << j << "] = " << outputs_lote[i][j] << "\n";
}
cout << "\n";
}
tmili = (int) (1000 * (fim.tv_sec - inicio.tv_sec) + (fim.tv_usec - inicio.tv_usec) / 1000);
cout << "\nTempo decorrido: E: " << entradas << ", A: " << altura << ", S: " << saidas << ", T: " << tmili;
return 0;
}