That compiler is an interesting thing, thanks a lot of the info!
I will post some code so you could understand better:
void accept_connections(unsigned int num_conns) {
int fds[MAX_THREADS];
int conn_indexes[MAX_THREADS];
int conn_idx=0;
int new_bottom;
socklen_t slenghts[MAX_THREADS];
void *labels1[MAX_THREADS] = {&&a0,&&a1,&&a2,&&a3,&&a4,&&a5,&&a6,&&a7,&&a8,&&a9,&&a10,&&a11,&&a12,&&a13,&&a14,&&a15};
void *labels2[MAX_THREADS] = {&&b0,&&b1,&&b2,&&b3,&&b4,&&b5,&&b6,&&b7,&&b8,&&b9,&&b10,&&b11,&&b12,&&b13,&&b14,&&b15};
void *labels3[MAX_THREADS] = {&&c0,&&c1,&&c2,&&c3,&&c4,&&c5,&&c6,&&c7,&&c8,&&c9,&&c10,&&c11,&&c12,&&c13,&&c14,&&c15};
new_bottom=fl_conn_indexes_top-num_conns;
if (new_bottom<=0) return;
goto *labels1[num_conns];
a15:
conn_indexes[MAX_THREADS- 1]=fl_conn_indexes[fl_conn_indexes_top- 1];
a14:
conn_indexes[MAX_THREADS- 2]=fl_conn_indexes[fl_conn_indexes_top- 2];
a13:
conn_indexes[MAX_THREADS- 3]=fl_conn_indexes[fl_conn_indexes_top- 3];
a12:
conn_indexes[MAX_THREADS- 4]=fl_conn_indexes[fl_conn_indexes_top- 4];
a11:
conn_indexes[MAX_THREADS- 5]=fl_conn_indexes[fl_conn_indexes_top- 5];
a10:
conn_indexes[MAX_THREADS- 6]=fl_conn_indexes[fl_conn_indexes_top- 6];
a9:
conn_indexes[MAX_THREADS- 7]=fl_conn_indexes[fl_conn_indexes_top- 7];
a8:
conn_indexes[MAX_THREADS- 8]=fl_conn_indexes[fl_conn_indexes_top- 8];
a7:
conn_indexes[MAX_THREADS- 9]=fl_conn_indexes[fl_conn_indexes_top- 9];
a6:
conn_indexes[MAX_THREADS-10]=fl_conn_indexes[fl_conn_indexes_top-10];
a5:
conn_indexes[MAX_THREADS-11]=fl_conn_indexes[fl_conn_indexes_top-11];
a4:
conn_indexes[MAX_THREADS-12]=fl_conn_indexes[fl_conn_indexes_top-12];
a3:
conn_indexes[MAX_THREADS-13]=fl_conn_indexes[fl_conn_indexes_top-13];
a2:
conn_indexes[MAX_THREADS-14]=fl_conn_indexes[fl_conn_indexes_top-14];
a1:
conn_indexes[MAX_THREADS-15]=fl_conn_indexes[fl_conn_indexes_top-15];
a0:
conn_indexes[MAX_THREADS-16]=fl_conn_indexes[fl_conn_indexes_top-16];
fl_conn_indexes_top=fl_conn_indexes_top-num_conns;
goto *labels2[num_conns];
b15:
conn_fds[conn_indexes[MAX_THREADS- 1]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 1]],&slenghts[MAX_THREADS- 1]);
b14:
conn_fds[conn_indexes[MAX_THREADS- 2]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 2]],&slenghts[MAX_THREADS- 2]);
b13:
conn_fds[conn_indexes[MAX_THREADS- 3]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 3]],&slenghts[MAX_THREADS- 3]);
b12:
conn_fds[conn_indexes[MAX_THREADS- 4]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 4]],&slenghts[MAX_THREADS- 4]);
b11:
conn_fds[conn_indexes[MAX_THREADS- 5]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 5]],&slenghts[MAX_THREADS- 5]);
b10:
conn_fds[conn_indexes[MAX_THREADS- 6]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 6]],&slenghts[MAX_THREADS- 6]);
b9:
conn_fds[conn_indexes[MAX_THREADS- 7]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 7]],&slenghts[MAX_THREADS- 7]);
b8:
conn_fds[conn_indexes[MAX_THREADS- 8]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 8]],&slenghts[MAX_THREADS- 8]);
b7:
conn_fds[conn_indexes[MAX_THREADS- 9]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 9]],&slenghts[MAX_THREADS- 9]);
b6:
conn_fds[conn_indexes[MAX_THREADS-10]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-10]],&slenghts[MAX_THREADS-10]);
b5:
conn_fds[conn_indexes[MAX_THREADS-11]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-11]],&slenghts[MAX_THREADS-11]);
b4:
conn_fds[conn_indexes[MAX_THREADS-12]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-12]],&slenghts[MAX_THREADS-12]);
b3:
conn_fds[conn_indexes[MAX_THREADS-13]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-13]],&slenghts[MAX_THREADS-13]);
b2:
conn_fds[conn_indexes[MAX_THREADS-14]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-14]],&slenghts[MAX_THREADS-14]);
b1:
conn_fds[conn_indexes[MAX_THREADS-15]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-15]],&slenghts[MAX_THREADS-15]);
b0:
conn_fds[conn_indexes[MAX_THREADS-16]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-16]],&slenghts[MAX_THREADS-16]);
goto *labels3[num_conns];
c15:
conn_statuses[conn_indexes[MAX_THREADS- 1]]=CONN_STATUS_READING;
c14:
conn_statuses[conn_indexes[MAX_THREADS- 2]]=CONN_STATUS_READING;
c13:
conn_statuses[conn_indexes[MAX_THREADS- 3]]=CONN_STATUS_READING;
c12:
conn_statuses[conn_indexes[MAX_THREADS- 4]]=CONN_STATUS_READING;
c11:
conn_statuses[conn_indexes[MAX_THREADS- 5]]=CONN_STATUS_READING;
c10:
conn_statuses[conn_indexes[MAX_THREADS- 6]]=CONN_STATUS_READING;
c9:
conn_statuses[conn_indexes[MAX_THREADS- 7]]=CONN_STATUS_READING;
c8:
conn_statuses[conn_indexes[MAX_THREADS- 8]]=CONN_STATUS_READING;
c7:
conn_statuses[conn_indexes[MAX_THREADS- 9]]=CONN_STATUS_READING;
c6:
conn_statuses[conn_indexes[MAX_THREADS-10]]=CONN_STATUS_READING;
c5:
conn_statuses[conn_indexes[MAX_THREADS-11]]=CONN_STATUS_READING;
c4:
conn_statuses[conn_indexes[MAX_THREADS-12]]=CONN_STATUS_READING;
c3:
conn_statuses[conn_indexes[MAX_THREADS-13]]=CONN_STATUS_READING;
c2:
conn_statuses[conn_indexes[MAX_THREADS-14]]=CONN_STATUS_READING;
c1:
conn_statuses[conn_indexes[MAX_THREADS-15]]=CONN_STATUS_READING;
c0:
conn_statuses[conn_indexes[MAX_THREADS-16]]=CONN_STATUS_READING;
}
This is a daemon that does parallel processing of MAX_THREADS number of connections. As you can see, I have to unroll the loops. For example, it would be nice to have a syntax like this in a compiler:
LOOP(ITERATOR=1,ITERATOR<MAX_THREADS) {
c{ITERATOR}:
conn_statuses[conn_indexes[MAX_THREADS- {ITERATOR}]]=CONN_STATUS_READING;
}
so it would unroll the loop MAX_THREADS number of times and then compile. the c{ITERATOR} is a label to jump to it when the number of threads is lower than MAX_THREADS. But such syntax is difficult to emulate with gcc and even with m4. CUDA has made it defining a kernel in a function, I can do this using inline function, but there is no easy solution to iterate lines like 1,2,3,4 … N and replace the ITERATOR variable. This is kind of tools I am trying to find, but didn’t found yet.