Source

benchmark_rnn / rnn_opt4.c

#include <stdlib.h>
#include <math.h>

typedef struct __RNN__{
  int num_c, num_s;
  double **wcc, *bc, *ec, **uc, **xc;
} RNN;

#define Wcc(i,j) self->wcc[i][j]
#define Bc(i)    self->bc[i]
#define Ec(i)    self->ec[i]
#define Uc(i,j)  self->uc[i][j]
#define Xc(i,j)  self->xc[i][j]
#define VWcc(i)  self->wcc[i]
#define VXc(i)   self->xc[i]

static inline double
ddot(const double * restrict array1, const double * restrict array2,
     const int num)
{
  int i;
  double val=0;
  for (i = 0; i < num; ++i) val += array1[i] * array2[i];
  return val;
}

void RNN_fptt(RNN *self)
{
  int s0, c0;

  s0 = 0;
  for (c0 = 0; c0 < self->num_c; ++c0){
    Xc(s0,c0) = tanh(Uc(s0,c0));
  }
  for (s0 = 1; s0 < self->num_s; ++s0){
    for (c0 = 0; c0 < self->num_c; ++c0){
      Uc(s0,c0) = (1 - Ec(c0)) * Uc(s0-1,c0)
	+ Ec(c0) * ( Bc(c0) + ddot(VWcc(c0), VXc(s0-1), self->num_c) );
      Xc(s0,c0) = tanh(Uc(s0,c0));
    }
  }
}

void RNN_init(RNN *self)
{
  int c0, c1;

  for (c0 = 0; c0 < self->num_c; ++c0){
    for (c1 = 0; c1 < self->num_c; ++c1){
      Wcc(c0,c1) = 0.1;
    }
    Ec(c0) = 0.1;
    Bc(c0) = 0.1;
    Uc(0,c0) = 0;
    Xc(0,c0) = 0;
  }
}

int main(int argc, char *argv[])
{
  int c, nc = atoi((argv[1]));
  int s, ns = atoi((argv[2]));
  int i, repeat_times = atoi(argv[3]);
  RNN rnn;
  double wcc[nc][nc], bc[nc], ec[nc], uc[ns][nc], xc[ns][nc];
  double *pwcc[nc], *puc[ns], *pxc[ns];

  rnn.num_s = ns;
  rnn.num_c = nc;
  rnn.ec  = ec;
  rnn.bc  = bc;
  rnn.wcc = pwcc;
  rnn.uc  = puc;
  rnn.xc  = pxc;
  for (c = 0; c < nc; ++c) rnn.wcc[c] = wcc[c];
  for (s = 0; s < ns; ++s) rnn.uc[s] = uc[s];
  for (s = 0; s < ns; ++s) rnn.xc[s] = xc[s];

  RNN_init(&rnn);
  for (i = 0; i < repeat_times; ++i){
    RNN_fptt(&rnn);
  }

  return 0;
}