/** So we have to find the error in this code. 
 ** To facilitate this, the code has been extensively
 ** commented. 
 ** Where I have found mistakes or some oddity, 
 ** I have explained it in a comment right before the
 ** mistakes etc occurred
 **
 ** -Keith A. Pray
 ** 
 ** All my comments will follow this form. 
 **/

/** Single Line Comments **/

/** Multiple 
 ** Line
 ** Comments
 **/

/*
 ******************************************************************
 * HISTORY
 * 15-Oct-94  Jeff Shufelt (js), Carnegie Mellon University
 *	Prepared for 15-681, Fall 1994.
 *
 ******************************************************************
 */

#include <stdio.h>
#include <backprop.h>
#include <math.h>

/** absolute value? golly my C is rusty... **/
#define ABS(x)          (((x) > 0.0) ? (x) : (-(x)))

/** copy an array to another array... quickly I would guess
 ** boy is this scary looking...
 **/
#define fastcopy(to,from,len)\
{\
  register char *_to,*_from;\
  register int _i,_l;\
  _to = (char *)(to);\
  _from = (char *)(from);\
  _l = (len);\
  for (_i = 0; _i < _l; _i++) *_to++ = *_from++;\
}

/******************************************************************/

/*** Return random number between 0.0 and 1.0 ***/

double drnd()
{
  return ( (double) random() / (double) BIGRND );
}

/******************************************************************/

/*** Return random number between -1.0 and 1.0 ***/

double dpn1()
{
  return ( ( drnd() * 2.0 ) - 1.0 );
}

/******************************************************************/

/*** The squashing function.  Currently, it's a sigmoid. ***/

/** as in formula 4.12 and illustrated in figure 4.6 from text **/

double squash ( x )
     double x;
{
  /** this formula looks correct **/
  return ( 1.0 / ( 1.0 + exp (-x ) ) );
}

/******************************************************************/

/*** Allocate 1d array of doubles ***/

double *alloc_1d_dbl ( n )
int n;
{
  double *new;

  new = (double *) malloc ( (unsigned) (n * sizeof (double) ) );
  if ( new == NULL )
  {
    printf ( "ALLOC_1D_DBL: Couldn't allocate array of doubles\n" );
    return ( NULL );
  }
  return ( new );

} /** END double *alloc_1d_dbl ( n ) **/

/******************************************************************/

/*** Allocate 2D array of doubles ***/

double **alloc_2d_dbl ( m, n )
int m, n;
{
  int i;
  double **new;

  new = (double **) malloc ( (unsigned) (m * sizeof (double *) ) );
  if ( new == NULL )
  {
    printf ( "ALLOC_2D_DBL: Couldn't allocate array of dbl ptrs\n" );
    return ( NULL );
  }

  for (i = 0; i < m; i++) 
  {
    new[i] = alloc_1d_dbl ( n );
  }

  return ( new );

} /** END double **alloc_2d_dbl ( m, n ) **/

/******************************************************************/

/** initializes weights in a 2D array to random values 
 ** -1.0 to 1.0
 **/

/** shouldn't these have a return type? **/

bpnn_randomize_weights ( w, m, n )
     double **w;
     int m, n;
{
  int i, j;

  for ( i = 0; i <= m; i++ )
  {
    for ( j = 0; j <= n; j++ )
    {
      w[i][j] = dpn1();
    }
  }

} /** END bpnn_randomize_weights ( w, m, n ) **/

/******************************************************************/

/** initialize a 2D array to 0.0 */

/** shouldn't these have a return type? **/

bpnn_zero_weights ( w, m, n )
     double **w;
     int m, n;
{
  int i, j;

  for ( i = 0; i <= m; i++ )
  {
    for ( j = 0; j <= n; j++ )
    {
      w[i][j] = 0.0;
    }
  }

} /** END bpnn_zero_weights ( w, m, n ) **/

/******************************************************************/

/** Seed the random number generator **/

void bpnn_initialize ( seed )
{
  printf ( "Random number generator seed: %d\n", seed );
  srandom ( seed );

} /** void bpnn_initialize ( seed ) **/

/******************************************************************/

/** Make a new back propagation neural network struct thingy **/

BPNN *bpnn_internal_create ( n_in, n_hidden, n_out )
     int n_in, n_hidden, n_out;
{
  BPNN *newnet;

  newnet = (BPNN *) malloc (sizeof (BPNN) );

  /** check if it worked **/
  if ( newnet == NULL )
  {
    printf ( "BPNN_CREATE: Couldn't allocate neural network\n" );
    return ( NULL );
  }

  newnet->input_n = n_in;
  newnet->hidden_n = n_hidden;
  newnet->output_n = n_out;

  /** allocating 1D arrays for the in, hidden and out units
   ** Why allocate + 1 ? 
   ** Cause the indexing of arrays in this code start from 1.
   **/
  newnet->input_units = alloc_1d_dbl ( n_in + 1 );
  newnet->hidden_units = alloc_1d_dbl ( n_hidden + 1 );
  newnet->output_units = alloc_1d_dbl ( n_out + 1 );

  newnet->hidden_delta = alloc_1d_dbl ( n_hidden + 1 );
  newnet->output_delta = alloc_1d_dbl ( n_out + 1 );
  newnet->target = alloc_1d_dbl ( n_out + 1 );

  newnet->input_weights = alloc_2d_dbl ( n_in + 1, n_hidden + 1 );
  newnet->hidden_weights = alloc_2d_dbl ( n_hidden + 1, n_out + 1 );

  newnet->input_prev_weights = alloc_2d_dbl ( n_in + 1, n_hidden + 1 );
  newnet->hidden_prev_weights = alloc_2d_dbl ( n_hidden + 1, n_out + 1 );

  return ( newnet );

} /** END BPNN *bpnn_internal_create ( n_in, n_hidden, n_out ) **/

/******************************************************************/

void bpnn_free ( net )
     BPNN *net;
{
  int n1, n2, i;

  n1 = net->input_n;
  n2 = net->hidden_n;

  free ( (char *) net->input_units );
  free ( (char *) net->hidden_units );
  free ( (char *) net->output_units );

  free ( (char *) net->hidden_delta );
  free ( (char *) net->output_delta );
  free ( (char *) net->target );

  for ( i = 0; i <= n1; i++ )
  {
    free ( (char *) net->input_weights[i] );
    free ( (char *) net->input_prev_weights[i] );
  }
  free ( (char *) net->input_weights );
  free ( (char *) net->input_prev_weights );

  for ( i = 0; i <= n2; i++ )
  {
    free ( (char *) net->hidden_weights[i] );
    free ( (char *) net->hidden_prev_weights[i] );
  }

  free ( (char *) net->hidden_weights );
  free ( (char *) net->hidden_prev_weights );

  free ( (char *) net );

} /** END void bpnn_free ( net ) **/

/******************************************************************/

/*** Creates a new fully-connected network from scratch,
     with the given numbers of input, hidden, and output units.
     Threshold units are automatically included.  All weights are
     randomly initialized.

     Space is also allocated for temporary storage (momentum weights,
     error computations, etc).
***/

BPNN *bpnn_create ( n_in, n_hidden, n_out )
     int n_in, n_hidden, n_out;
{

  BPNN *newnet;

  newnet = bpnn_internal_create ( n_in, n_hidden, n_out );

#ifdef INITZERO
  bpnn_zero_weights ( newnet->input_weights, n_in, n_hidden );
#else
  bpnn_randomize_weights ( newnet->input_weights, n_in, n_hidden );
#endif
  bpnn_randomize_weights ( newnet->hidden_weights, n_hidden, n_out );
  bpnn_zero_weights ( newnet->input_prev_weights, n_in, n_hidden );
  bpnn_zero_weights ( newnet->hidden_prev_weights, n_hidden, n_out );

  return ( newnet );

} /** END BPNN *bpnn_create ( n_in, n_hidden, n_out ) **/

/******************************************************************/

void bpnn_layerforward ( l1, l2, conn, n1, n2 )
     double *l1, *l2, **conn;
     int n1, n2;
{
  double sum;
  int j, k;

  /*** Set up thresholding unit ***/
  l1[0] = 1.0;

  /*** For each unit in second layer ***/
  for ( j = 1; j <= n2; j++ )
  {
    /*** Compute weighted sum of its inputs ***/
    sum = 0.0;
    
    /** for each unit in layer 1 **/
    for ( k = 0; k <= n1; k++ ) 
    {
      /** corresponds to Figure 4.6, sum of weighted inputs to
       ** unit. conn[k][j] contains the weight of the connection
       ** from the j-th first layer unit to the k-th second
       ** layer unit. l1[k] contains the input from the k-th 
       ** unit from layer 1.
       **/
      sum += conn[k][j] * l1[k];
    }

    /** this computes the sigma as in Formula 4.12 (Figure 4.6) **/
    l2[j] = squash ( sum );
  }

} /** END void bpnn_layerforward ( l1, l2, conn, n1, n2 ) **/

/******************************************************************/

/** Calculates the error for the output units **/ 

void bpnn_output_error ( delta, target, output, nj, err )
     double *delta, *target, *output, *err;
     int nj;
{
  int j;
  double o, t, errsum;

  /** clean slate,no errors yet **/
  errsum = 0.0;

  /** for each output unit **/
  for ( j = 1; j <= nj; j++ )
  {
    /** get the output value **/
    o = output[j];

    /** get the target value for this **/
    t = target[j];

    /** Formula T4.3 from Table 4.2 in text.
     ** and to the formula 4.26 in the text
     ** -(t_j - o_j) o_j (1 - o_j)
     ** Which is part of the stochastic gradient descent rule
     ** for output units.
     **/
    delta[j] = o * ( 1.0 - o ) * ( t - o );

    /** sum up the error to get ready for calculating
     ** the error for the hidden units.
     ** Note: In the Table 4.2 it shows this sum as
     ** sum ( delta[j] * weight[kh] ) where h is the hidden unit
     ** and weight[kh] is the weight of the 
     ** line hidden -> output unit.
     ** but this sum does not include the weight factor.
     ** Since the error is summarized here into a single value,
     ** check carefully that the bpnn_hidden_error function
     ** somehow accounts for this. 
     ** ...
     ** Ok, in the error function for the hidden units,
     ** the delta[j] from here is being used along with the 
     ** weight_kh. The error sum being calculated here is simply
     ** for reporting purposes. Possibly used by a driver that
     ** uses this error for determining when to stop the 
     ** learning process. I guess I am easy to confuse...
     **/
    errsum += ABS ( delta[j] );
  }

  *err = errsum;

} /** END void bpnn_output_error ( delta, target, output, nj, err ) **/

/******************************************************************/

/** Calculates the error for the hidden units **/

void bpnn_hidden_error ( delta_h, nh, delta_o, no, who, hidden, err )
double *delta_h, *delta_o, *hidden, **who, *err;
int nh, no;

/** who is the array of weights for the hidden units **/

{
  int j, k;
  double h, sum, errsum;

  /** start with no error **/
  errsum = 0.0;

  /** for each hidden unit **/
  for (j = 1; j <= nh; j++) 
  {
    /** get the hidden unit value **/
    h = hidden[j];
  
    /** init the sum 
    sum = 0.0;
    
    /** for each output unit **/
    for (k = 1; k <= no; k++) 
    {
      /** It looks like something is wrong here.
       ** Instead of using the error term for the output unit,
       ** the delta k of the output is being used.
       **/
      sum += delta_o[k] * who[j][k];
    }
    
    /** This corresponds to T4.4 in Table 4.2 **/
    delta_h[j] = h * (1.0 - h) * sum;

    /** maintain the error sum for reporting **/
    errsum += ABS(delta_h[j]);
  }
  
  *err = errsum;

} /** END void bpnn_hidden_error ( delta_h, nh, delta_o, no, 
   **				   who, hidden, err ) 
   **/

/******************************************************************/

/** Adjusts the weights for the given units **/

void bpnn_adjust_weights ( delta, ndelta, ly, nly, w, oldw, eta, momentum )
double *delta, *ly, **w, **oldw, eta, momentum;
/** delta	: the delta calculated in error functions
 ** ndelta	: number of deltas - should be same as input units
 ** ly		: the input (un-weighted)
 ** nly		: number of inputs
 ** w		: the weights (current) to be adjusted
 ** oldw	: the old weights
 ** eta		: learning rate
 ** momentum	: influence (weight) factor of previous weights
 **/
{
  double new_dw;
  int k, j;

  /** So, this is odd. Since most of the arrays are indexed
   ** starting from 1.
   ** So the loop for each input starts at 0...
   ** So there is an extra weight in w which doesn't seem to 
   ** correspond to any unit.
   **/
  ly[0] = 1.0;
  
  /** for each delta value **/
  for ( j = 1; j <= ndelta; j++ )
  {
    /** for each input **/
    for ( k = 0; k <= nly; k++ )
    {
      /** calculates the delta to be used to update the weight
       ** the first part ( eta * delta[j] * ly[k] )
       ** corresponds to T4.5 from Table 4.2
       ** with the second part, the whole statement below
       ** corresponds to Equation 4.18 in the text.
       **/
      new_dw = ( ( eta * delta[j] * ly[k] ) + ( momentum * oldw[k][j] ) );

      /** add the delta weight to the current weight **/
      w[k][j] += new_dw;

      /** save this weight for later use in momentum **/
      oldw[k][j] = new_dw;
    }
  }
} /** END void bpnn_adjust_weights ( delta, ndelta, ly, nly, w, oldw, 
   **				     eta, momentum )
   **/

/******************************************************************/

void bpnn_feedforward ( net )
     BPNN *net;
{
  int in, hid, out;

  in = net->input_n;
  hid = net->hidden_n;
  out = net->output_n;

  /*** Feed forward input activations. ***/
  bpnn_layerforward ( net->input_units, net->hidden_units,
		      net->input_weights, in, hid );

  bpnn_layerforward ( net->hidden_units, net->output_units,
		      net->hidden_weights, hid, out );

}

/******************************************************************/

/** Trains the neural network **/

void bpnn_train ( net, eta, momentum, eo, eh )
     BPNN *net;
     double eta, momentum, *eo, *eh;
{
  int in, hid, out;
  double out_err, hid_err;
  
  in = net->input_n;
  hid = net->hidden_n;
  out = net->output_n;

  /*** Feed forward input activations. ***/
  bpnn_layerforward ( net->input_units, net->hidden_units,
		      net->input_weights, in, hid );

  bpnn_layerforward ( net->hidden_units, net->output_units,
		      net->hidden_weights, hid, out );

  /*** Compute error on output and hidden units. ***/
  bpnn_output_error ( net->output_delta, net->target, net->output_units,
		      out, &out_err );

  bpnn_hidden_error ( net->hidden_delta, hid, net->output_delta, out,
		      net->hidden_weights, net->hidden_units, &hid_err );
  *eo = out_err;
  *eh = hid_err;

  /*** Adjust input and hidden weights. ***/

  bpnn_adjust_weights ( net->output_delta, out, net->hidden_units, hid,
			net->hidden_weights, net->hidden_prev_weights, 
			eta, momentum );

  bpnn_adjust_weights ( net->hidden_delta, hid, net->input_units, in,
			net->input_weights, net->input_prev_weights, 
			eta, momentum );

} /** END void bpnn_train ( net, eta, momentum, eo, eh ) **/

/******************************************************************/

/** Saves a neural network to a file for later retrieval **/

void bpnn_save ( net, filename )
     BPNN *net;
     char *filename;
{
  int fd, n1, n2, n3, i, j, memcnt;
  double dvalue, **w;
  char *mem;

  if ( ( fd = creat ( filename, 0644 ) ) == -1 ) 
  {
    printf ( "BPNN_SAVE: Cannot create '%s'\n", filename );
    return;
  }

  n1 = net->input_n;  n2 = net->hidden_n;  n3 = net->output_n;

  printf ( "Saving %dx%dx%d network to '%s'\n", n1, n2, n3, filename );
  fflush ( stdout );

  write ( fd, (char *) &n1, sizeof(int) );
  write ( fd, (char *) &n2, sizeof(int) );
  write ( fd, (char *) &n3, sizeof(int) );

  memcnt = 0;
  w = net->input_weights;
  mem = (char *) malloc ((unsigned) ((n1+1) * (n2+1) * sizeof(double)));
  for ( i = 0; i <= n1; i++ ) 
  {
    for ( j = 0; j <= n2; j++ ) 
    {
      dvalue = w[i][j];
      fastcopy ( &mem[memcnt], &dvalue, sizeof(double) );

      memcnt += sizeof(double);
    }
  }
  write ( fd, mem, (n1+1) * (n2+1) * sizeof(double) );
  free ( mem ) ;

  memcnt = 0;
  w = net->hidden_weights;
  mem = (char *) malloc ((unsigned) ((n2+1) * (n3+1) * sizeof(double)));
  for (i = 0; i <= n2; i++) 
  {
    for (j = 0; j <= n3; j++) 
    {
      dvalue = w[i][j];
      fastcopy(&mem[memcnt], &dvalue, sizeof(double));
      memcnt += sizeof(double);
    }
  }
  write(fd, mem, (n2+1) * (n3+1) * sizeof(double));
  free ( mem );

  close ( fd );
  return;

} /** END void bpnn_save ( net, filename ) **/

/******************************************************************/

/** Reads in a neural network from a file **/

BPNN *bpnn_read ( filename )
     char *filename;
{
  char *mem;
  BPNN *new;
  int fd, n1, n2, n3, i, j, memcnt;

  if ( ( fd = open ( filename, 0, 0644 ) ) == -1 ) 
  {
    return ( NULL );
  }

  printf ( "Reading '%s'\n", filename );  
  fflush ( stdout );

  read ( fd, (char *) &n1, sizeof(int) );
  read ( fd, (char *) &n2, sizeof(int) );
  read ( fd, (char *) &n3, sizeof(int) );
  new = bpnn_internal_create ( n1, n2, n3 );

  printf ( "'%s' contains a %dx%dx%d network\n", filename, n1, n2, n3 );
  printf ( "Reading input weights..." );  
  fflush ( stdout );

  memcnt = 0;
  mem = (char *) malloc ((unsigned) ((n1+1) * (n2+1) * sizeof(double)));
  read(fd, mem, (n1+1) * (n2+1) * sizeof(double));
  for ( i = 0; i <= n1; i++ )
  {
    for ( j = 0; j <= n2; j++ ) 
    {
      fastcopy ( &( new->input_weights[i][j] ), &mem[memcnt], sizeof(double) );
      memcnt += sizeof(double);
    }
  }
  free ( mem );

  printf ( "Done\nReading hidden weights..." );
  fflush ( stdout );

  memcnt = 0;
  mem = (char *) malloc ((unsigned) ( (n2+1) * (n3+1) * sizeof(double) ) );
  read ( fd, mem, (n2+1) * (n3+1) * sizeof(double) );
  for ( i = 0; i <= n2; i++ )
  {
    for ( j = 0; j <= n3; j++ ) 
    {
      fastcopy ( &( new->hidden_weights[i][j] ), 
		 &mem[memcnt], sizeof(double) );
      memcnt += sizeof(double);
    }
  }
  free ( mem );
  close ( fd );

  printf ( "Done\n" );
  fflush(stdout);

  bpnn_zero_weights ( new->input_prev_weights, n1, n2 );
  bpnn_zero_weights ( new->hidden_prev_weights, n2, n3 );

  return ( new );

} /** END BPNN *bpnn_read(filename) **/

/******************************************************************/