/* backprop/backprop4.c */
/*
   Minor changes last made 28 05 94

   Changes worth making: in classifier, make an output log and antilog 
   array and calculate at feedforward time, rather than error signal time. 
*/

#include "src/r.h"
#include "src/mynr.h"
#include "backprop4.h"

FILE    *fopen(); 

extern	int 	control[] , **controlpointer , ***wn ;
extern	double	aux[] ;
extern	double	**auxpointer , **inputs , **targets ;
extern  float   **image ;
extern	char	files[10][50];  

void load_activities(number)
     int	number;
{
  int	i , layer , k , l , col , row;
  float *p ;
  
  layer=INPUTLAYER;
  i=1;
  if(!DATA_FROM_IM){
    for(i=1;i<=NUMBERIN_(layer);i++){
      XACT(i,layer)=inputs[number][i];}
  } else {
    number -= C_IM_P[IMEXOFFSET] ; /* Offset for train / test1 / test2 */
    row = ( number / C_IM_P[IMWIDTH] ) * C_IM_P[IMSPA] ;
    col = ( number % C_IM_P[IMWIDTH] ) * C_IM_P[IMSPA] ;
    p = CURRENT_IM + col + row * C_IM_P[IMNX] ;
    for ( k = 0 ; ( k < OY ) ; k++ ) { /* write all lines */
      for ( l = 0 ; l < OX ; l++,i++ ) {
	XACT(i,layer)= p[l]  ;
      }
      p += C_IM_P[IMNX];
    }
  }
}

void active_image(num)
int num;
{
  switch(num){
    case(1): CURRENT_IM = TRAIN_IMAGE; C_IM_P = TRAIN_PATCH; break;
    case(2): CURRENT_IM = TEST1_IMAGE; C_IM_P = TEST1_PATCH; break;
    case(3):default: CURRENT_IM = TEST2_IMAGE; C_IM_P = TEST2_PATCH; break;
  }
}

int set_up_image(file_spr,file_sdt,im_pp,con_p)
     char *file_spr,*file_sdt;
     float **im_pp;
     int *con_p;
{
  
  FILE   *spr_fp;
  int i,np;
  unsigned char *iimage;
  char junk[50];
  
  spr_fp = fopen(file_spr,"r"); 
  fscanf( spr_fp, "%s", junk );  
  fscanf( spr_fp,"%d", &con_p[IMNX]);
  fscanf( spr_fp, "%s", junk );  
  fscanf( spr_fp, "%s", junk );  
  fscanf( spr_fp,"%d", &con_p[IMNY]);
  fclose( spr_fp ); 
  np = con_p[IMNX] * con_p[IMNY] ; con_p[IMNP] = np;
  iimage = malloce(np);
  readb(file_sdt, iimage, np); 
  con_p[IMOFF] = con_p[IMXLO] + con_p[IMNX] * con_p[IMYLO] ; 
  iimage +=    con_p[IMOFF] ;     /* align image with first pixel in patch */
  np -= con_p[IMOFF] ;
  im_pp[0] = malloce(np*sizeof(float));
  
  for (i=0; i < np; i++)
    im_pp[0][i] = (float)iimage[i] / IMSCALE - IMSUBTRACT ;
  
  free(iimage);

  if( con_p[IMNX] - 1 < con_p[IMXHI] + OX ) {
    printf ("Warning: image patch too wide\n" ) ;
    con_p[IMXHI] = con_p[IMNX] - 1 - OX ;
  }
  if( con_p[IMNY] - 1 < con_p[IMYHI] + OY ) {
    printf ("Warning: image patch too tall\n" ) ;
    con_p[IMYHI] = con_p[IMNY] - 1 - OY ;
  }

  con_p[IMNUM] = ( ( con_p[IMXHI] - con_p[IMXLO] ) / con_p[IMSPA] + 1 ) *  
    ( ( con_p[IMYHI] - con_p[IMYLO] ) / con_p[IMSPA] + 1 ) ;
  fprintf( stderr , "%s Number of examples = %d \n" , file_sdt , con_p[IMNUM]
	  ) ; 
  con_p[IMWIDTH] = ( con_p[IMXHI] - con_p[IMXLO] ) / con_p[IMSPA] + 1 ;

  return ( con_p[IMNUM] ) ;
}

void image_to_targets(file_sdt,con_p,i,max)
     char *file_sdt ;
     int *con_p , i , max ;
{

  int np;
  unsigned char *clean;
  int	layer,k,l,col,row,value;
  
  np = con_p[IMNP] ;
  clean = malloce(np);
  readb(file_sdt, clean, np);
  clean += con_p[IMOFF] ; /* align image with first pixel in patch */
  
  layer=INPUTLAYER;
  for( k = 0  ; i <= max ; i ++ , k ++ ) {
    row = ( k / C_IM_P[IMWIDTH] ) * C_IM_P[IMSPA] ;
    col = ( k % C_IM_P[IMWIDTH] ) * C_IM_P[IMSPA] ;
    value = clean[ ( col + OX / 2 ) + ( row + OX / 2 ) * C_IM_P[IMNX] ] ;
    targets[i][1] =  IMBINOUT ? ( value > IMCLIP ) : value ;
  }
  free(clean);
}

void load_activities2(x,y)
	double	x,y;
{

	int	i,layer;

	layer=INPUTLAYER;
	i=1;
	XACT(i,layer)=x;
	if(NUMBERIN_(layer)>1){
		i++;
		XACT(i,layer)=y;
	}
}

void display_xs(layer,newln)
	int	layer,newln;
{

	int	i;

	for(i=1;i<=NUMBERIN_(layer);i++)
		printf("%4g	",XACT(i,layer));
	if(newln)printf("\n");
}

void display_ds()
{

	int	i,layer;

	layer=HIDDENLAYER;  
	for(i=1;i<=NUMBERIN_(layer);i++)
		printf("%4g ",DELTA(i,layer));
	printf(":	");
	layer=OUTPUTLAYER;
	for(i=1;i<=NUMBERIN_(layer);i++)
		printf("%4g ",DELTA(i,layer));
	printf("\n");
}

void print_weights(weight,style)
	int	style;
	double	*weight;
{

	printf("First layer:	%d -	",NWB4O-1); 
	pdv(weight,1,NWB4O-1,style); newline();
	printf("Second layer:	%d -	",WEIGHTNUMBER-NWB4O+1); 
	pdv(weight,NWB4O,WEIGHTNUMBER,style); newline();
}

void print_params(weight,style)
	int	style;
	double	*weight;      
{

	int	i;

	printf("%d : %d : %d --- %d weights \n",INPUTN,HIDDENN,OUTPUTN,WEIGHTNUMBER);
	printf("SIGMA_W's:"); 
	for(i=1;i<=REGS;i++){
		printf("%g	",V_SIGMA_W(i));
	}
	printf("\n");
	pause_for_return();
}

double	act_fun(number,act,anti)
	int 	number;
	double	act,*anti;
{
  double tmpd ;

  switch(number){
  case(1):
    return tanh(act);
    break;
  case(2):
    /*		anti[0]=0.5*(1.0+tanh(-act));
		return 0.5*(1.0+tanh(act));*/
    anti[0]=1.0/(1.0+exp(2.0*act));
    return 1.0/(1.0+exp(-2.0*act));
    break;
  case(3):
  case(10):/* 10 is the choice of af for classification problems'
	      output units. */
    anti[0]=1.0/(1.0+exp(act));
    return 1.0/(1.0+exp(-act));
    break;
  case(4):
    anti[0] = 1.0/(1.0+exp(-act)); /* this is the derivative */
    return ( log ( 1.0 + exp(act) ) ) ; /* ___/       */
    break;
#define BETAFIVE 100.0
  case(5):
    anti[0] = 1.0/(1.0+exp(- BETAFIVE * act)); 
    /* this is the derivative */
    return ( log ( 1.0 + exp(BETAFIVE * act) ) / BETAFIVE ) ; 
    /* ___/  sharper corner       */
    break;
  default:
  case(0):
    return	act;
    break;
  }
}

double	d_act_fun(number,act,xact,antixact)
	int 	number;
	double	act,xact,antixact;
{
	switch(number){
	case(1):
		return	1.0-xact*xact;
		break;
	case(2):
		return 2.0*(xact)*(antixact); /* check this */
		break;
	case(3):
		return (xact)*(antixact); /* check this */
		break;
	      case(4):
	      case(5):
		return antixact ;
		break;
	case(0):
	case(10):	/* 10 is the choice of af for classification problems'
				output units. It is a 0/1 sigmoid going forward
				but going backward it is linear because 
				of the G-info objective function */
	default:
		return	1.0;
		break;
	}
}

void forward_pass(weight)
     double	*weight;
{

  int	i,j,layer;	
  
  for(layer=HIDDENLAYER;layer<=OUTPUTLAYER;layer++){
    for(i=1;i<=NUMBERIN_(layer);i++){
      AACT(i,layer)=0.0;
      for(j=0;j<=NUMBERIN_(layer-1);j++){
	AACT(i,layer)+=weight[wn[i][j][layer]]*XACT(j,layer-1);
      }
      XACT(i,layer)=act_fun(AF_(layer),AACT(i,layer),&AXACT(i,layer)); /* allow the act_fun to write the anti-x-activity in if it wants */
    }
  }
}

void full_forward_pass(weight) /* makes use of full connectivity */
     double	*weight;
{

  int	i,j,layer;	
  double act , *x ; 
  
  weight++ ;
  for ( layer = HIDDENLAYER ; layer <= OUTPUTLAYER ; layer ++ ) {
    for ( i = 1 ; i <= NUMBERIN_(layer) ; i ++ ) {
      act = 0.0 ;
      for ( j = 0 , x = &XACT(j,layer-1) ; 
	   j <= NUMBERIN_(layer-1) ; 
	   j ++ , x++ , weight++) {
	act += (*weight) *  (*x); 
      }
      AACT(i,layer) = act ;
      XACT(i,layer)=act_fun(AF_(layer), act , &AXACT(i,layer)); /* allow the act_fun to write the anti-x-activity in if it wants */
    }
  }
}

double safelog ( xx ) 
     double xx ;
{
  if ( xx > 0.0 ) return  log ( xx ) ; 
  else {  printf ( "l! ") ; if(VERBOSE>=2) pause_for_return() ; return -30.0 ; }
}

double error_signal(number)
	int	number;
{
  double	energy=0.0,antixact,xact , thiserr;
  int	i,j,layer;	

  layer=OUTPUTLAYER;
  for( i = 1 ; i <= NUMBERIN_( layer ) ; i++ ) {
    if(!CLASSIFIER){ 
      thiserr = ERROR(i,layer)=XACT(i,layer)-targets[number][i];
      energy+=(thiserr * thiserr);
    }
    else{
      antixact = AXACT( i , layer ) ; 
      xact = XACT( i , layer ) ; 
      if( targets[number][i] == 1.0 ) {
	ERROR( i , layer ) = ( -antixact ) ;
	energy -= safelog( xact ) ;
	if ( xact >= CLASSTHRESHOLD ) L_CORRECT ++ ;
      } else {
	ERROR( i , layer ) = xact ; 
	energy -=  safelog( antixact ) ;
	if ( xact < CLASSTHRESHOLD ) L_CORRECT ++ ;
      }
    }
    DELTA(i,layer)=d_act_fun(AF_(layer),AACT(i,layer),XACT(i,layer),AXACT(i,layer)) * ERROR(i,layer);
  }
  if (!CLASSIFIER){  energy *= 0.5 ; }
  return	energy; 
}

/* see log_like for log predictive error */

void add_noise_to_outputs(weight)
	double	*weight;
{
	int	i,j,layer;	
	double	junk;

	layer=OUTPUTLAYER;
	for(i=1;i<=NUMBERIN_(layer);i++){
		junk=gaussian(SIGMA_NU);
		XACT(i,layer)+=junk;
		AXACT(i,layer)-=junk;
	}
}

double sum_curv_outputs()
{
	int	i,j,layer;	
	double	sco=0.0,antixact;

	layer=OUTPUTLAYER;
	for(i=1;i<=NUMBERIN_(layer);i++){
		antixact=AXACT(i,layer); /* such that XACT = 1-antixact */ 
		sco+=XACT(i,layer)*antixact;
	}
	return (sco*4.0);
}

void add_error_bars_to_outputs(weight,quant)
	double	*weight,quant;
{
	int	i,layer;	

	layer=OUTPUTLAYER;
	for(i=1;i<=NUMBERIN_(layer);i++){
		XACT(i,layer)+=ERRORBARS[i]*quant;
	}
}

void 	write_residuals_to(fp,layer,newln)
	int	layer,newln;
	FILE	*fp;
{
	int	i;	

	for(i=1;i<=NUMBERIN_(layer);i++){
		fprintf(fp,"%g	",ERROR(i,layer));
	}
	if(newln)fnewline;
}

void 	write_activities_to(fp,layer,newln)
	int	layer,newln;
	FILE	*fp;
{
	int	i;	

	for(i=1;i<=NUMBERIN_(layer);i++){
		fprintf(fp,"%9f ",XACT(i,layer)); /* changed to %f for mathematica, 25 sep 91 */
	}
	if(newln)fnewline;
}

void 	write_ebs_to(fp,newln)
	int	newln;
	FILE	*fp;
{
	int	i,layer;	

	layer=OUTPUTLAYER;
	if(VERBOSE==2)printf("num in layer = %d  eb[1] = %f \n",NUMBERIN_(layer),ERRORBARS[1]);
	for(i=1;i<=NUMBERIN_(layer);i++){
		if(VERBOSE==2)printf("Writing error bar %d\n",i);
		if(GEN_EB1OR2==1)fprintf(fp,"%9f ",ERRORBARS[i]*EBFACTOR); /* changed to %f for mathematica, 25 sep 91 */
		else if(GEN_EB1OR2==2) fprintf(fp,"%9f ",ERRORBARS2[i]*EBFACTOR);
		else fprintf(fp,"%9f ",0.5*log(1.0+ERRORBARS2[i]*EBFACTOR));
	}
	if(newln)fnewline;
}

void back_pass0(weight)
     double	*weight;
{

  int	i,j,layer;	
  for(layer=OUTPUTLAYER-1;layer>=INPUTLAYER+1;layer--){ 
    /* note that only a single step is made; 
       there is no need to evaluate deltas at the input layer, unless we are going 
       to fantasy descent in the input space */
    for(j=1;j<=NUMBERIN_(layer);j++){
      ERROR(j,layer)=0.0;
      for(i=1;i<=NUMBERIN_(layer+1);i++){
	ERROR(j,layer)+=WEIGHT(i,j,layer+1)*DELTA(i,layer+1);
      }
      DELTA(j,layer)=d_act_fun(AF_(layer),AACT(j,layer),
			       XACT(j,layer),AXACT(j,layer))
	*ERROR(j,layer);
    }
  }
}

void full_back_pass(weight) /* assumes full connectivity */ 
     double	*weight;
{
  double *cw , delta ;
  int	i,j,layer;	
  for(layer=OUTPUTLAYER-1;layer>=INPUTLAYER+1;layer--){ 
    /* note that only a single step is made; 
       there is no need to evaluate deltas at the input layer, unless we are going 
       to fantasy descent in the input space */
    for( j = 1 ; j <= NUMBERIN_(layer) ; j ++ ) {
      ERROR( j , layer ) = 0.0 ;
    }
    for( i = 1 ; i <= NUMBERIN_(layer+1) ; i ++ ) {
      delta = DELTA( i , layer+1);
      for( j=1 , cw = &weight[wn[i][j][layer+1]] ; 
	  j <= NUMBERIN_(layer) ; 
	  j ++ , cw ++ ) {
	ERROR(j,layer) += *cw * delta ;
      }
    }
    for( j = 1 ; j <= NUMBERIN_(layer) ; j ++ ) {
      DELTA(j,layer)=d_act_fun(AF_(layer),AACT(j,layer),
			       XACT(j,layer),AXACT(j,layer))
	*ERROR(j,layer);
    }
  }
}

void back_to_inputs(weight)
     double	*weight;
{

  int	i,j,layer;	
  layer=INPUTLAYER;
  for(j=1;j<=NUMBERIN_(layer);j++){
    ERROR(j,layer)=0.0;
    for(i=1;i<=NUMBERIN_(layer+1);i++){
      ERROR(j,layer)+=WEIGHT(i,j,layer+1)*DELTA(i,layer+1);
    }
  }
}

void multi_back_pass(weight) /* write me! */
	double	*weight;
{

	int	i,j,layer;	
	for(layer=HIDDENLAYER;layer>=HIDDENLAYER;layer--){ /* note that only a single step is made; 
			there is no need to evaluate deltas at the input layer, unless we are going 
			to fantasy descent in the input space */
		for(j=1;j<=NUMBERIN_(layer);j++){
			ERROR(j,layer)=0.0;
			for(i=1;i<=NUMBERIN_(layer+1);i++){
				ERROR(j,layer)+=WEIGHT(i,j,layer+1)*DELTA(i,layer+1);
			}
			DELTA(j,layer)=d_act_fun(AF_(layer),AACT(j,layer),XACT(j,layer),AXACT(j,layer))*ERROR(j,layer);
		}
	}
}

void inc_gradient(weight,dweight)
     double	*weight,*dweight;
{

  int	i,j,layer;	

  for(layer=HIDDENLAYER;layer<=OUTPUTLAYER;layer++){
    for(i=1;i<=NUMBERIN_(layer);i++){
      for(j=0;j<=NUMBERIN_(layer-1);j++ ){
	dweight[wn[i][j][layer]]+=DELTA(i,layer)*XACT(j,layer-1); 
	/* used to divide by var_nu here */
      }
    }
  }
}

void full_inc_gradient(weight,dweight)
     double	*weight,*dweight;
{

  int	i,j,layer;	
  double delta , *x , *cd ; 

  for(layer=HIDDENLAYER;layer<=OUTPUTLAYER;layer++){
    cd = &dweight[ wn[1][0][layer] ] ; 
    for(i=1;i<=NUMBERIN_(layer);i++){
      delta = DELTA(i,layer) ;
      for( j = 0 , x = &XACT(j,layer-1) ;
	  j<=NUMBERIN_(layer-1);
	  j++ , cd ++ , x ++ ){
	*cd += delta * (*x) ; 
      }
    }
  }
}

void set_gradient(weight,dweight)
	double	*weight,*dweight;
{

	int	i,j,layer;	

	for(layer=HIDDENLAYER;layer<=OUTPUTLAYER;layer++)
		for(i=1;i<=NUMBERIN_(layer);i++)
			for(j=0;j<=NUMBERIN_(layer-1);j++)
				dweight[wn[i][j][layer]]=DELTA(i,layer)*XACT(j,layer-1);
	/* NB this gradient omits all nu dependence */
}

void inc_hessian(weight,hessian,dweight,class_gradient)
	double	*weight,**hessian,*dweight,class_gradient;
{

  int	i,j,layer;	
  double factor;

  factor = 1.0/SIGMA_NU;
  for(layer=HIDDENLAYER;layer<=OUTPUTLAYER;layer++)
    for(i=1;i<=NUMBERIN_(layer);i++)
      for(j=0;j<=NUMBERIN_(layer-1);j++)
	dweight[wn[i][j][layer]]=DELTA(i,layer)*XACT(j,layer-1)*factor;
  /* the s_nu gets squared later */
  
  for(i=1;i<=WEIGHTNUMBER;i++){
    for(j=i;j<=WEIGHTNUMBER;j++){ 
      /* NB, just do half of the matrix -- then sym_hessian */
      if(CLASSIFIER)hessian[i][j]+=dweight[i]*dweight[j]*class_gradient;
      else hessian[i][j]+=dweight[i]*dweight[j]; 
    }
  }
}

void sym_hessian(hessian)
	double	**hessian;
{

	int	i,j;	

	for(i=1;i<=WEIGHTNUMBER;i++){
		for(j=i+1;j<=WEIGHTNUMBER;j++){
			hessian[j][i]=hessian[i][j];
		}
	}
}

void inc_gradient_penalty(weight,dweight)
     double	*weight,*dweight;
{
  int	i,j,layer,num;	 
  
  for(layer=HIDDENLAYER;layer<=OUTPUTLAYER;layer++){
    for(i=1;i<=NUMBERIN_(layer);i++){
      for(j=0;j<=NUMBERIN_(layer-1);j++){
	num = wn[i][j][layer];
	dweight[num]+=weight[num]/V_VAR_W(REGCLASS[num]);
      }
    }
  }
}

void set_up_regclass() /* REGRULE 5 added      28 5 94 */
{
  int	i,j,k,layer,check=0;	

  switch(REGRULE){
  case(2): REGS=3; break;
  case(3): REGS=3+NUMBERIN_(INPUTLAYER)-1; break;
  case(4): REGS=1+NUMBERIN_(INPUTLAYER)+NUMBERIN_(OUTPUTLAYER); break;
  case(5): 
    REGS = 1 + NUMBERIN_(INPUTLAYER) / IN_GROUP_SIZE + NUMBERIN_(OUTPUTLAYER);
    if ( NUMBERIN_(INPUTLAYER) % IN_GROUP_SIZE  > 0 ) REGS ++ ;
    break;
  case(1):
  default: REGS=1;
  }
/* in backprop2.h, 	REGMAX = REGS ;  */
  REGCLASS=ivector(0,WEIGHTNUMBER);
  V_GAMMA1=dvector(1,REGMAX);
  V_GAMMA3=dvector(1,REGMAX);
  V_LOGS_W=dvector(1,REGMAX);
  V_DLA=dvector(1,REGMAX);
  V_W_ENERGY=dvector(1,REGMAX);
  V_BF_LOGS_W=dvector(1,REGMAX);
  V_WN=ivector(1,REGMAX);
  for(i=1;i<=REGMAX;i++)V_WN[i]=0;
  switch(REGRULE){
  case(2):	/* input weights, biases, output weights+biases */
    layer=HIDDENLAYER;
    for(i=1;i<=NUMBERIN_(layer);i++){
      j=0;
      REGCLASS[wn[i][j][layer]]=2;
      V_WN[2]++;
      for(j=1;j<=NUMBERIN_(layer-1);j++){
	REGCLASS[wn[i][j][layer]]=1;
	V_WN[1]++;
      }
    }
    layer=OUTPUTLAYER;
    for(i=1;i<=NUMBERIN_(layer);i++){
      for(j=0;j<=NUMBERIN_(layer-1);j++){
	REGCLASS[wn[i][j][layer]]=3; 
	V_WN[3]++;
      }
    }
    break;
  case(3):	/* each input's weights; hiddens' biases; 
		   output weights+biases */
    /* This prior allows pruning of irrelevant inputs
       This was added 20 11 92 at Radford's instigation */
    layer=HIDDENLAYER;
    for(i=1;i<=NUMBERIN_(layer);i++){
      j=0;
      REGCLASS[wn[i][j][layer]]=REGS-1;
      V_WN[REGS-1]++;
      for(j=1;j<=NUMBERIN_(layer-1);j++){
	REGCLASS[wn[i][j][layer]]=j;
	V_WN[j]++;
      }
    }
    layer=OUTPUTLAYER;
    for(i=1;i<=NUMBERIN_(layer);i++){
      for(j=0;j<=NUMBERIN_(layer-1);j++){
	REGCLASS[wn[i][j][layer]]=REGS;
	V_WN[REGS]++;
      }
    }
    break;
  case(4):	/* each input's weights; hiddens' biases; 
		   each output's weights+biases */
    /* This prior allows pruning of irrelevant inputs
       This was added 20 11 92 at Radford's instigation */
    layer=HIDDENLAYER;
    for(i=1;i<=NUMBERIN_(layer);i++){
      j=0;
      REGCLASS[wn[i][j][layer]]=REGS-NUMBERIN_(OUTPUTLAYER);
      V_WN[REGS-NUMBERIN_(OUTPUTLAYER)]++;
      for(j=1;j<=NUMBERIN_(layer-1);j++){
	REGCLASS[wn[i][j][layer]]=j;
	V_WN[j]++;
      }
    }
    layer=OUTPUTLAYER;
    for(i=1,k=2+NUMBERIN_(INPUTLAYER);i<=NUMBERIN_(layer);i++,k++){
      for(j=0;j<=NUMBERIN_(layer-1);j++){
	REGCLASS[wn[i][j][layer]]=k;
	V_WN[k]++;
      }
    }
    break;
  case(5):	/* As (4) except that inputs are grouped in 
		   groups of IN_GROUP_SIZE   */

    layer=HIDDENLAYER;
    for(i=1;i<=NUMBERIN_(layer);i++){
      j=0; /* biases go in last class before outputs */
      k=REGS-NUMBERIN_(OUTPUTLAYER);
      REGCLASS[wn[i][j][layer]]=k;
      V_WN[k]++;
      for( j = 1 , k = 1 ; j <= NUMBERIN_(layer-1) ; j ++ ) {
	REGCLASS[wn[i][j][layer]] = k ;
	V_WN[k] ++ ;
	if ( !( j%IN_GROUP_SIZE ) ) k ++ ;
      }
    }
    k=REGS-NUMBERIN_(OUTPUTLAYER)+1;
    layer=OUTPUTLAYER;
    for(i=1;i<=NUMBERIN_(layer);i++,k++){
      for(j=0;j<=NUMBERIN_(layer-1);j++){
	REGCLASS[wn[i][j][layer]]=k;
	V_WN[k]++;
      }
    }
    break;
  case(1):
  default:
    REGS=1;
    for(i=1;i<=WEIGHTNUMBER;i++){ REGCLASS[i]=1;V_WN[1]++;}
    break;
  }
  
  printf("Number of regularisers: %d\nNumber in each class:\n",REGS);
  for(i=1;i<=REGS;i++){
    printf("%d	",V_WN[i]);
    check+=V_WN[i];
  }
  printf("\n");
  if(check!=WEIGHTNUMBER){
    printf("check = %d, WEIGHTNUMBER = %d \n",check,WEIGHTNUMBER);
    crash("check!=WEIGHTNUMBER");
  }
  GAMMA1= GAMMA2= set_to_k_gammas(V_GAMMA1); 
  GAMMA3= GAMMA4= set_to_k_gammas(V_GAMMA3);
}

void set_up_wn()
{

  int	i,j,layer,imax,jmax;	

  imax = MAX(MAX(HIDDENN,OUTPUTN),INPUTN);
  wn=imatrix3(1,imax,0,imax,HIDDENLAYER,OUTPUTLAYER);

/* include switch () here, or read in from file */
/* write a routine for the saliency end of things such that can write 
to file a vector under the control of another vector which specifies 
existence. That way, can forget about all that shuffling mess */
/*  printf ( "Weight numbers \n" ) ; */
  for(layer=HIDDENLAYER;layer<=OUTPUTLAYER;layer++){
    for(i=1;i<=NUMBERIN_(layer);i++){
      for(j=0;j<=NUMBERIN_(layer-1);j++){
	wn[i][j][layer]=(i-1)*(NUMBERIN_(layer-1)+1)+j+W_OFFSET(layer);
/*	printf ( "%d ", wn[i][j][layer] ) ; */
      }
    }
  }
/*  printf ( "\n" ) ; */
}

void weight_energies(weight)	/* used to be weight_energy */
     double	*weight;
{
  int	i,j,layer,num;	

  for(i=1;i<=REGS;i++)
    V_W_ENERGY[i]=0.0;
  W_X2 =0.0;
  for(layer=HIDDENLAYER;layer<=OUTPUTLAYER;layer++){ 
    for(i=1;i<=NUMBERIN_(layer);i++){
      for(j=0;j<=NUMBERIN_(layer-1);j++){
	num = wn[i][j][layer];
	V_W_ENERGY[REGCLASS[num]]+=weight[num]*weight[num];
      }
    }
  }
  for(i=1;i<=REGS;i++){
    W_X2 += V_W_ENERGY[i] * V_ALPHA(i);
    V_W_ENERGY[i] *= 0.5;
  }
}

/*	GAMMA1 = (double)(WEIGHTNUMBER) - trace(hessin, FREENUMBER) / VAR_W; */
double 	find_gammas(v_gamma,hessin)
	double	**hessin,*v_gamma;
{
	double	d=0.0;
        int i,j,class;  

	for(i=1;i<=REGS;i++)v_gamma[i]=0.0;

        for(j=1;j<=WEIGHTNUMBER;j++) {
		v_gamma[REGCLASS[j]] -= hessin[j][j];
		if(VERBOSE==2)printf("%d",REGCLASS[j]);
	}
	if(VERBOSE==2)printf("\n");

	for(i=1;i<=REGS;i++){
		v_gamma[i]/=V_VAR_W(i);
		v_gamma[i]+=(double)(V_WN[i]);
		d+=v_gamma[i];
	}
        return  d;
}

double 	set_to_k_gammas(v_gamma)
	double	*v_gamma;
{
        int i ; 
	double sum;

	for(i=1;i<=REGS;i++){
		sum+= ( v_gamma[i]  =  (double)(V_WN[i]) );
	}
	return sum; 
}

int	unit_not_used(weight,layer,j)
	double	*weight;
	int 	j,layer;
{

	double	sum=0.0,count=0.0;
	int	i,k;

	for(i=1;i<=NUMBERIN_(layer+1);i++){
		sum+=WEIGHT(i,j,layer+1)*WEIGHT(i,j,layer+1);
		count+=1.0;
	}
	for(k=0;k<=NUMBERIN_(layer-1);k++){
		sum+=WEIGHT(j,k,layer)*WEIGHT(j,k,layer);
		count+=1.0;
	}
	return((sum<MIN_SQ_WEIGHT*count) ? 1:0);	
}
int	units_same(weight,layer,j,i)
	double	*weight;
	int 	i,j,layer;
{

	double	sum=0.0,count=0.0,d;
	int	k;

	for(k=0;k<=NUMBERIN_(layer-1);k++){
		d = fabs(WEIGHT(i,k,layer))-fabs(WEIGHT(j,k,layer));
		sum+=	d*d/V_VAR_W(REGCLASS[wn[i][k][layer]]);
		count+=1.0;
	}
	return((sum<MIN_SQ_WEIGHT*count) ? 1:0);	
}

int	count_unused(weight,verbose)
	double	*weight;
{
	int	j,count=0,temp;

	for(j=1;j<=NUMBERIN_(HIDDENLAYER);j++){
		count+=(temp=unit_not_used(weight,HIDDENLAYER,j));
		if(verbose&&temp){ printf("%d:",j); NU_NUM=j;}
	}
	if(verbose&&count)printf("%d Units not used\n",count);
	NOTUSED = count;
	return	count;
}

int	count_symmetries(weight,verbose)
	double	*weight;
{

	int	i,j,count=0,temp,answer=1;

	for(j=2;j<=NUMBERIN_(HIDDENLAYER);j++){
		count=0;
		for(i=1;i<j;i++){
			count+=(temp=units_same(weight,HIDDENLAYER,j,i));
			if(verbose&&temp){ 
				printf("%d%d:",i,j); SYMM_N1 = i; SYMM_N2 = j;
			}
		}
		answer*=(1+count);
	}
	if(verbose&&(answer>1))printf(" - %d Symmetries detected\n",answer);
	SYMM = answer; 
	return	answer;
}

void 	ev_pfs(weight)
	double	*weight; 
{
	int	i;


	if(!CLASSIFIER)LPF_NU=(DNDF)*(log(STPI)+LOGS_NU);
	else LPF_NU=0.0;
	LPF_W=0.0;
	for(i=1;i<=REGS;i++){
		LPF_W+=(double)(V_WN[i])*(log(STPI)+V_LOGS_W[i]);
	}
}

void	set_up_params1()
{

	NUB4I = 0; /* to allow for bias activity  */
	NUB4H = NUB4I + 1 + INPUTN;  
	NUB4O = NUB4H + HIDDENN; /* no need for a bias activity at the output */ 
	NUAFTER = NUB4O + OUTPUTN;
/* sloppiness here doesn't matter. It only matters that the weight vector is tightly packed */ 

	NWB4H	= 1;
	NWB4O	= HIDDENN*(INPUTN+1) + NWB4H;

	WEIGHTNUMBER = OUTPUTN*(HIDDENN+1) + HIDDENN*(INPUTN+1);
	TOTNUMBER = WEIGHTNUMBER + 2;
	FREENUMBER = WEIGHTNUMBER;

	LEV = ivector ( 1, LEVNUM ) ;
	set_up_wn();
	set_up_regclass();
}

void	set_up_params2()
{

	int 	k;

	PARAM = dvector(1,TOTNUMBER);
	DPARAM = dvector(1,FREENUMBER);
	setdvectortoconst(DPARAM+1,FREENUMBER,0.0);
	AVECTOR = dvector(NUB4H,NUAFTER);
	XVECTOR = dvector(0,NUAFTER);
	AXVECTOR = dvector(0,NUAFTER);
		setdvectortoconst(AXVECTOR,NUAFTER+1,0.0);
	DVECTOR = dvector(NUB4H,NUAFTER);
	EVECTOR = dvector(0,NUAFTER);
	if(!DATA_FROM_IM){
	  inputs = dmatrix(1,NUMBER_DATA,1,INPUTN);
	}
	targets = dmatrix(1,NUMBER_DATA,1,OUTPUTN);
	ERRORBARS = dvector(1,OUTPUTN);
	ERRORBARS2 = dvector(1,OUTPUTN);

/* initialise `bias unit' activities to 1.0 */
	for(k=INPUTLAYER;k<=HIDDENLAYER;k++) 
		XACT(0,k)=1.0;
}

void	read_spec(specfile,wseed)
	char	specfile[];
	long	*wseed;
{ 
	char	junk[150];
	FILE    *fp;

	int	i,num_regs;

	if(VERBOSE) printf( "Reading in specs from %s \n", specfile );
	fp = fopen( specfile, "r" );
	if( !fp )   fprintf( stderr, "No such file: %s\n", specfile ), exit(0);
	JUNKSTRING;	
	JUNKSTRING;	
	JUNKSTRING;
	JUNKSTRING; fscanf( fp, "%d", &INPUTN );
	JUNKSTRING; 	if(!HIDDENN) fscanf( fp, "%d", &HIDDENN ); 
			else JUNKSTRING;
			fscanf( fp, "%d", &ACT_FUN_H );
	JUNKSTRING; fscanf( fp, "%d", &OUTPUTN ); fscanf( fp, "%d", &ACT_FUN_O );
	if(ACT_FUN_O==10)CLASSIFIER=1;
	else	CLASSIFIER=0;
	JUNKSTRING;
	JUNKSTRING; fscanf( fp, "%d", &WEIGHTS_FROM_FILE );
	JUNKSTRING; fscanf( fp, "%s", WEIGHTFILE );
	JUNKSTRING; fscanf( fp, "%ld", wseed );
	JUNKSTRING; fscanf( fp, "%lf", &INIT_SIZE );
	JUNKSTRING;
	JUNKSTRING; fscanf( fp, "%d", &CHEAPO );
	JUNKSTRING; fscanf( fp, "%d", &DATA_FROM_IM );
	JUNKSTRING;
	JUNKSTRING;
	JUNKSTRING;
	JUNKSTRING; fscanf( fp, "%s", INPUTFILE ); fscanf( fp, "%s", TARGETFILE );
	JUNKSTRING; fscanf( fp, "%d", &NUMBER_DATA );
	JUNKSTRING; fscanf( fp, "%d", &LAST_TRAIN );
	JUNKSTRING; fscanf( fp, "%d", &FIRST_TEST );
			fscanf( fp, "%d", &LAST_TEST );
		TEST_NUMBER=1+LAST_TEST-FIRST_TEST;
	JUNKSTRING; fscanf( fp, "%d", &FIRST_TEST2 );
			fscanf( fp, "%d", &LAST_TEST2 );
		TEST_NUMBER2=1+LAST_TEST2-FIRST_TEST2;
	JUNKSTRING;
	TRAIN_PATCH=ivector(1,IMMAX);
	TEST1_PATCH=ivector(1,IMMAX);
	TEST2_PATCH=ivector(1,IMMAX);
#define READIM(x) fscanf( fp, "%d %d %d %d %d", &x[1], &x[2], &x[3], &x[4], &x[5] )
	JUNKSTRING;JUNKSTRING;
	JUNKSTRING; fscanf( fp, "%d", &OX ); fscanf( fp, "%d", &OY );
	if( DATA_FROM_IM ) {
	  if( INPUTN != OX * OY ) {
	    printf("Warning: INPUTN %d != OX * OY " , INPUTN ) ;
	    INPUTN = OX * OY ;
	    printf("%d -- INPUTN reset\n" , INPUTN ) ;
	  }
	  if( OUTPUTN != 1 ) {
	    printf("Warning: OUTPUTN != 1 ; resetting to 1 \n" ) ;
	    OUTPUTN = 1;
	  }
	}
	JUNKSTRING; fscanf( fp, "%lf", &IMSCALE ); fscanf( fp, "%lf", &IMSUBTRACT );
	JUNKSTRING; fscanf( fp, "%d", &IMBINOUT ); fscanf( fp, "%d", &IMCLIP );
	JUNKSTRING; fscanf( fp, "%s", junk ); 
	            sprintf(TRAIN_IM_I, "%s.sdt",junk  );
	            sprintf(TRAIN_SPR, "%s.spr",junk  );
	            fscanf( fp, "%s", junk ) ; 
	            sprintf(TRAIN_IM_T, "%s.sdt",junk  ); 
	JUNKSTRING; READIM(TRAIN_PATCH);
	JUNKSTRING; fscanf( fp, "%s", junk ); 
	            sprintf(TEST1_IM_I, "%s.sdt",junk  );
	            sprintf(TEST1_SPR, "%s.spr",junk  );
	            fscanf( fp, "%s", junk ) ; 
	            sprintf(TEST1_IM_T, "%s.sdt",junk  ); 
	JUNKSTRING; READIM(TEST1_PATCH);
	JUNKSTRING; fscanf( fp, "%s", junk ); 
	            sprintf(TEST2_IM_I, "%s.sdt",junk  );
	            sprintf(TEST2_SPR, "%s.spr",junk  );
	            fscanf( fp, "%s", junk ) ; 
	            sprintf(TEST2_IM_T, "%s.sdt",junk  ); 
	JUNKSTRING; READIM(TEST2_PATCH);
#undef  READIM
	if( DATA_FROM_IM ) {
	  TRAIN_NUMBER = 
	    set_up_image( TRAIN_SPR , TRAIN_IM_I , &TRAIN_IMAGE , TRAIN_PATCH ) ;
	  TEST_NUMBER = 
	    set_up_image( TEST1_SPR , TEST1_IM_I , &TEST1_IMAGE , TEST1_PATCH ) ; 
	  TEST_NUMBER2 = 
	    set_up_image( TEST2_SPR , TEST2_IM_I , &TEST2_IMAGE , TEST2_PATCH ) ; 
	  NUMBER_DATA = TRAIN_NUMBER + TEST_NUMBER + TEST_NUMBER2 ;
	  FIRST_TEST = TRAIN_NUMBER + 1 ;
	  LAST_TEST = TRAIN_NUMBER + TEST_NUMBER ;
	  FIRST_TEST2 = LAST_TEST + 1 ;
	  LAST_TEST2 = NUMBER_DATA ; 
	  TRAIN_PATCH[IMEXOFFSET] = 1;
	  TEST1_PATCH[IMEXOFFSET] = FIRST_TEST;
	  TEST2_PATCH[IMEXOFFSET] = FIRST_TEST2;
	}
	JUNKSTRING;
	JUNKSTRING; fscanf( fp, "%d", &AUTO );
	JUNKSTRING; fscanf( fp, "%d", &PRINT_PERIOD );
	JUNKSTRING; fscanf( fp, "%d", &REPORT );
		fscanf( fp, "%s", REPORT_FILE );
	JUNKSTRING; fscanf( fp, "%d", &RFINAL );
		fscanf( fp, "%s", RFINAL_FILE );
	JUNKSTRING; fscanf( fp, "%d", &LOOPS );
	JUNKSTRING; fscanf( fp, "%d", &ITERATIONS );
	JUNKSTRING; fscanf( fp, "%d", &WEAK_ON_FINAL );
	JUNKSTRING; fscanf( fp, "%d", &REINIT_WEIGHTS );
	JUNKSTRING;
 	JUNKSTRING; fscanf( fp, "%d", &WPENALTY );
	JUNKSTRING; fscanf( fp, "%d", &OP_S_W );
	JUNKSTRING; fscanf( fp, "%d", &REGRULE );
	GAMMA_FUDGE = 1.0 ;  /* while we are hacking things ... */
	IN_GROUP_SIZE = 21 ; /* This hack is for input units that represent 
				amino acids (regrule 5) */
	set_up_params1(); /* This includes setting up the reg classes */
	set_up_params2();
	JUNKSTRING; fscanf( fp, "%d", &OP_S_NU );
/*	JUNKSTRING; fscanf( fp, "%lf", &ETA );
		ETA*=(double)(NDF);	*/
	JUNKSTRING; fscanf( fp, "%lf", &DTEMP2 );
	if(CLASSIFIER){
		LOGS_NU = 0.0 ; OP_S_NU = 0 ; INV_VAR_NU = 1.0;
	      } else {
		LOGS_NU = safelog( DTEMP2 ) ; INV_VAR_NU = 1.0/VAR_NU;
	      }
	BETA_MIN = BETA; /* The specified noise level is used 
			    as a maximum noise level to bound 
			    noise-optimisation */
	JUNKSTRING; 
	for(i=1;i<=REGS;i++){
		fscanf( fp, "%lf", &DTEMP1 );
		V_LOGS_W[i]=log(DTEMP1);
	}	/* This may be overridden by bigback.c line 91 */
	JUNKSTRING;
	JUNKSTRING; fscanf( fp, "%lf", &PRIOR_ALPHA ); fscanf( fp, "%lf", &PRIOR_BETA );
	JUNKSTRING; fscanf( fp, "%lf", &DELTA_SW_SCAN );
	JUNKSTRING; fscanf( fp, "%d", &ALT1 );
		fscanf( fp, "%d", &ALT2 );
	JUNKSTRING; fscanf( fp, "%d", &WRITEEIGS ); 
		fscanf( fp, "%s", EIGENVALUES );
	JUNKSTRING; fscanf( fp, "%d", &GAMMA_SOURCE );
		fscanf( fp, "%d", &OCCAM_SOURCE );
	JUNKSTRING; fscanf( fp, "%d", &OCCAM2_SOURCE );
	JUNKSTRING; fscanf( fp, "%d", &ENABLE_PERMUTATION_FIDDLE );
	JUNKSTRING; fscanf( fp, "%d", &PHI_NUMBER );
	JUNKSTRING;
	JUNKSTRING; fscanf( fp, "%d", &STORE_W );
	JUNKSTRING; fscanf( fp, "%s", WEIGHTSOUT );
	JUNKSTRING; fscanf( fp, "%d", &STORE_H1 );
	JUNKSTRING; fscanf( fp, "%s", HESSINOUT1 );
	 fscanf( fp, "%s", LUHESSINOUT1 );
	JUNKSTRING; fscanf( fp, "%d", &STORE_H2 );
	JUNKSTRING; fscanf( fp, "%s", HESSINOUT2 );
	 fscanf( fp, "%s", LUHESSINOUT2 );
	JUNKSTRING; fscanf( fp, "%d", &STORE_DH );
	JUNKSTRING; fscanf( fp, "%s", DHESSINOUT );
/*	JUNKSTRING; fscanf( fp, "%d", &STORE_RW );
	JUNKSTRING; fscanf( fp, "%s", RWFILE );         */
	JUNKSTRING;
	JUNKSTRING;
	JUNKSTRING;
	JUNKSTRING; fscanf( fp, "%d", &VM );
	if(CHEAPO>1 && ( VM==1 || VM==3) ) VM = 2 ;
	JUNKSTRING; fscanf( fp, "%d", &REINIT );
	JUNKSTRING; fscanf( fp, "%lf", &FRAC_TOL );
	JUNKSTRING; fscanf( fp, "%lf", &FRAC_LINMIN_TOL );
	JUNKSTRING; fscanf( fp, "%lf", &FT_MULTIPLE );
	JUNKSTRING; fscanf( fp, "%lf", &FT_MIN );
	JUNKSTRING; fscanf( fp, "%lf", &FLT_MIN );
	JUNKSTRING; fscanf( fp, "%lf", &WK_FT );
	JUNKSTRING; fscanf( fp, "%lf", &WK_FLT );
	JUNKSTRING; fscanf( fp, "%lf", &MIN_SQ_WEIGHT );
	JUNKSTRING; for(i=1;i<=LEVNUM;i++){
	  fscanf( fp, "%d", &LEV[i] );
	  if(LEV[i] < 1 ) { 
	    fprintf(stderr, "Warning: LEV[%d] = %d < 1  - corrected\n", i, LEV[i]);
	    LEV[i] = 1;
	  }
	}      
	JUNKSTRING;
	JUNKSTRING; fscanf( fp, "%lf", &KAPPA );
	JUNKSTRING;
	JUNKSTRING;
	JUNKSTRING;
	JUNKSTRING; fscanf( fp, "%d", &GEN_ADD_N );
	JUNKSTRING; fscanf( fp, "%d", &GEN_I_TOO );
	JUNKSTRING; fscanf( fp, "%d", &GEN_WRITE1 );
	fscanf( fp, "%s", OUTPUTFILE );
	JUNKSTRING; fscanf( fp, "%d", &GEN_WRITE2 );
	fscanf( fp, "%s", IOFILE );
	JUNKSTRING; fscanf( fp, "%d", &GEN_WRITE_RES );
	fscanf( fp, "%s", RESFILE );
	JUNKSTRING; fscanf( fp, "%d", &GEN_SENS );
	fscanf( fp, "%s", OTHERFILE );
	JUNKSTRING; fscanf( fp, "%d", &GEN_BARS );	
	JUNKSTRING;
	JUNKSTRING;
	JUNKSTRING; fscanf( fp, "%s", HESSININ );fscanf( fp, "%s", LUHESSININ );
	JUNKSTRING;
	JUNKSTRING; fscanf( fp, "%d", &CLASSEBSTYLE );
	JUNKSTRING; fscanf( fp, "%d", &GEN_WRITE_MIO );
		fscanf( fp, "%s", MIOFILE );
	JUNKSTRING; fscanf( fp, "%s", ERRORBARFILE );
		fscanf( fp, "%d", &GEN_EB1OR2 );
		fscanf( fp, "%lf", &EBFACTOR );
	JUNKSTRING; fscanf( fp, "%d", &GEN_WRITE_MMIG );
		fscanf( fp, "%s", MMIGFILE );
		fscanf( fp, "%lf", &MMIGFACTOR );
	JUNKSTRING; fscanf( fp, "%s", RHOFILE );
	JUNKSTRING; fscanf( fp, "%d", &CH_EFF_NUMDAT );
	JUNKSTRING;
	JUNKSTRING; fscanf( fp, "%d", &VERBOSE );
	JUNKSTRING; 
	JUNKSTRING; 
	fscanf( fp, "%d", &PHN );
	fscanf( fp, "%d", &PSW );
	fscanf( fp, "%d", &PBFSW );
	fscanf( fp, "%d", &PEBSW );
	fscanf( fp, "%d", &PSN );
	fscanf( fp, "%d", &PBFSN );
	fscanf( fp, "%d", &PFT );
	fscanf( fp, "%d", &POBF );
	fscanf( fp, "%d", &PDE );
	fscanf( fp, "%d", &PWE );
	fscanf( fp, "%d", &PDX );
	fscanf( fp, "%d", &PNGD );
	fscanf( fp, "%d", &PWXS );
	fscanf( fp, "%d", &PWX );
	fscanf( fp, "%d", &PGS );
	fscanf( fp, "%d", &PG1 );
	fscanf( fp, "%d", &PG2 );
	fscanf( fp, "%d", &PG3 );
	fscanf( fp, "%d", &PG4 );
	fscanf( fp, "%d", &PLN );
	fscanf( fp, "%d", &PLW );
	fscanf( fp, "%d", &PLO1 );
	fscanf( fp, "%d", &PLO2 );
	fscanf( fp, "%d", &PLO3 );
	fscanf( fp, "%d", &PLO4 );
	fscanf( fp, "%d", &PLOF );
	JUNKSTRING;
	fscanf( fp, "%d", &PLP );
	fscanf( fp, "%d", &PSY );
	fscanf( fp, "%d", &PNU );
	fscanf( fp, "%d", &PTE );
	fscanf( fp, "%d", &PMTX );
	fscanf( fp, "%d", &PMTXB );
	fscanf( fp, "%d", &PTX );
	fscanf( fp, "%d", &PTL );
	fscanf( fp, "%d", &PTE2 );
	fscanf( fp, "%d", &PMTX2 );
	fscanf( fp, "%d", &PMTX2B );
	fscanf( fp, "%d", &PTX2 );
	fscanf( fp, "%d", &PTL2 );
	fscanf( fp, "%d", &PEVAB );
	fscanf( fp, "%d", &PEVB );
	fscanf( fp, "%d", &PEV );
	fscanf( fp, "%d", &PEVAB2 );
	fscanf( fp, "%d", &PFG1 );
	fscanf( fp, "%d", &PFG2 );
     if(CLASSIFIER){ PSN=0;PBFSN=0; }
     if(!CLASSIFIER){ GEN_WRITE_MIO=0; }
     if(CHEAPO>1){PGS=0;PG1=0;PG2=0;PG3=0;PG4=0;PLO1=0;PLO2=0;PLO3=0;
		PLO4=0;PLOF=0;PMTX=0;PMTXB=0;PMTX2=0;PMTX2B=0;PEVAB=0;
		PEVB=0;PEV=0;PEVAB2=0;
	      }
	else if(CHEAPO==1){PMTX2=0;PMTX=0;}
     JUNKSTRING;
	JUNKSTRING;fscanf( fp, "%d", &RESCALE );
	JUNKSTRING;

	JUNKSTRING;
	if(VERBOSE) 
	  printf( "Check spec read OK: this should say LAST_STRING: %s\n", junk);
	if(VERBOSE) printf( "Have Read in specifications\n" );
	fclose( fp );
}

double	find_energy(low,step,high,weight)
	int	low,step,high;
	double	*weight;
{

	double	energy;
	int	k;

	energy=0.0;
	for(k=low;k<=high;k+=step){
		load_activities(k);
		full_forward_pass(weight);
		energy+=error_signal(k);
	}
	return	energy;
}

double	find_moderated_energy(low,step,high,weight,dweight,hessin) /* only intended for clasifier use */
     /* actually this prog should have been written using find_error_bars */
     int	low,step,high;
     double	*weight,*dweight,**hessin;
{
  
  double	dtemp,energy;
  int	k,layer,i;
  
  energy = 0.0;
  for(k = low;k<= high;k+= step){
    load_activities(k);
    full_forward_pass(weight);
    /* need to find \bg, find error bars on a, then recompute AACT + XACT */
    layer = OUTPUTLAYER;
    for(i = 1;i<=OUTPUTN;i++)
      DELTA(i,OUTPUTLAYER) = 0.0;
    
    for(i = 1;i<=OUTPUTN;i++){
      ERROR(i,layer) = 1.0;
      DELTA(i,layer) = 1.0;
      full_back_pass(weight);/* slightly inefficient */
      set_gradient(weight,dweight);
      dtemp = quadratic_form(hessin,dweight,1,WEIGHTNUMBER);
      /* use dtemp as s^2(a) to moderate AACT */
      moderate2(&AACT(i,layer),dtemp);
      XACT(i,layer) = act_fun(AF_(layer),AACT(i,layer),&AXACT(i,layer));
      DELTA(i,layer) = 0.0;
    }
    energy+= error_signal(k);
  }
  return	energy;
}

double	lu_find_moderated_energy(low,step,high,weight,dweight,luh,luhindx) /* only intended for clasifier use */
/* actually this prog should have been written using find_error_bars */
	int	low,step,high,*luhindx;
	double	*weight,*dweight,**luh;
{

  double	dtemp,energy;
  int	k,layer,i;

  energy=0.0;
  for(k=low;k<=high;k+=step){
    load_activities(k);
    full_forward_pass(weight);
    /* need to find \bg, find error bars on a, then recompute AACT + XACT */
    layer=OUTPUTLAYER;
    for(i=1;i<=OUTPUTN;i++)
      DELTA(i,OUTPUTLAYER)=0.0;
    
    for(i=1;i<=OUTPUTN;i++){
      ERROR(i,layer)=1.0;
      DELTA(i,layer)=1.0;
      full_back_pass(weight);/* slightly inefficient */
      set_gradient(weight,dweight);
      copydvector(dweight,WEIGHTNUMBER,SCRATCHV);
      dtemp = lumatrixproduct(dweight,luh,SCRATCHV,WEIGHTNUMBER,luhindx);
      /* use dtemp as s^2(a) to moderate AACT */
      moderate2(&AACT(i,layer),dtemp);
      XACT(i,layer)=act_fun(AF_(layer),AACT(i,layer),&AXACT(i,layer));
      DELTA(i,layer)=0.0;
    }
    energy+=error_signal(k);
  }
  return	energy;
}

void	moderate(a,s2,kf,kappa,g,factor)
	double	*a,s2,*kf,*kappa,*g,*factor;
{

	kappa[0]=1.0/sqrt(1.0+MKAPPA*s2);
	g[0]=2.0 * s2 / sqrt( a[0]*a[0] + 4.0 * s2*s2);
	kf[0]=(1.0 - g[0] * (1.0-kappa[0]));
	factor[0]=a[0]*a[0]/( a[0]*a[0] + 4.0 * s2*s2);
	switch(PHI_NUMBER){
	case(2):
		a[0] *= kf[0];
		break;
	default:
	case(1):
		a[0] *= kappa[0];
		break;
	}
}

double	dmoderate(act,x,ax,kf,kappa,g,factor,number)
/* gives the derivative of f(moderate(a,s2)) w r t a */
/* number = 1 gives derivative. number=2 is handy for 
	mmig calcn */
	double	act,x,ax,kappa,kf,g,factor;
	int number;
{
	switch(PHI_NUMBER){
	case(2):
		return( x*ax* dpower((kf - factor * g ),number));
		break;
	default:
	case(1):
		return(dpower( kappa,number)*  x*ax);
		break;
	}
}

/* outdated */
void	moderate2(a,s2)
	double	*a,s2;
{
  if(s2>0.0)
	a[0] *= 1.0/sqrt(1.0+MKAPPA*s2); /* there used to be no sqrt here */
  return;
}

void	report_saliencies(hessin,weight,from,to)
	double	**hessin,*weight;
	int 	from,to;
{

	int	i;
	double	d1,d2=1000000.0;

	from = MAX(from,1);
	to = MIN(to,WEIGHTNUMBER);

	if(VERBOSE) printf("Saliencies: \n");
	for(i=from;i<=to;i++){
		d1= weight[i]*weight[i]/hessin[i][i];
		if(d1<d2) {d2=d1; LEAST_S = i;}
		printf("%5g ",d1);
	}
	newline();
}

void	compensate_weight(hessin,weight,wnumber) /* wnumber is the number of the 
			weight from hidden to output unit */ 
	double	**hessin,*weight;
	int 	wnumber;
{

	int	i;
	double	mag;

	/* First perform the subtraction from w appropriate to deletion 
	of the weight to the [assumed single] output unit */

	mag = weight[wnumber]/hessin[wnumber][wnumber];
	for(i=1;i<=WEIGHTNUMBER;i++){
		weight[i]-= hessin[i][wnumber]*mag;
	}

}


void	remove_hidden(hessin,weight,wnumber) /* wnumber is the number of the 
			weight from hidden to output unit */ 
	double	**hessin,*weight;
	int	wnumber;
{

	int	i,number;
	double	mag;

/* Then delete weights and shuffle them along */
	
	number = wnumber - NWB4O; /* CHECK */
	if((number<1)||(number>HIDDENN)){
		printf("Error, remove_hidden %d %d\n",wnumber,number); 
		return;
	} else if(VERBOSE==2) 
		printf("Deleting hidden %d  \n",number);

	for(i=(number)*(INPUTN+1)+NWB4H;i<=wnumber-1;i++){
		weight[i-(INPUTN+1)]=weight[i];
	}
	for(i=wnumber;i<=WEIGHTNUMBER-1;i++){
		weight[i-(INPUTN+1)]=weight[i+1];
	}
	HIDDENN--;
	set_up_params1(); /* should delete memory for wn etc */
}

double	log_like() /* this is not really the log like if there is
		      more than one output */
{
  double	loglike=0.0,dtemp;
  int	i,layer;	
  
  layer=OUTPUTLAYER;
  for(i=1;i<=NUMBERIN_(layer);i++){
    dtemp = ERROR(i,layer)/ERRORBARS[i];
    loglike+=(-(dtemp*dtemp*0.5)-safelog(STPI*ERRORBARS[i]));
  }
  return	loglike; 
}

/* log_predictive_error */
double	find_log_like( low , step , high , weight , dweight , luh , luhindx )
     /* NOT USED until bigback5 */
	int	low,step,high,*luhindx;
	double	*weight,*dweight,**luh;
{

  double	loglike;
  int	k;
  
  loglike=0.0;
  for(k=low;k<=high;k+=step){
    load_activities(k);
    full_forward_pass(weight);
    lu_find_error_bars ( weight , dweight , luh , luhindx ) ;
    incorporate_sigma_into_error_bars(weight);
    error_signal(k);
    loglike += log_like();
  }
  return	loglike;
}

double	find_gradient_on(low,step,high,weight,dweight)
     int	low,step,high;
     double	*weight,*dweight;
{
  
  int	k;	
  double	energy=0.0;
  
  setdvectortoconst(dweight+1,FREENUMBER,0.0);
  if(CLASSIFIER){ DNGOODDATA=0.0; L_CORRECT=0;}
  for(k=low;k<=high;k+=step){
    load_activities(k);
    full_forward_pass(weight);
    energy+=error_signal(k);
    if(CLASSIFIER)DNGOODDATA+=sum_curv_outputs();
    full_back_pass(weight); 
    full_inc_gradient(weight,dweight); /* used to divide by var_nu in here */
  }
  if(!CLASSIFIER)
    multiplydvector(dweight+1,FREENUMBER,(1.0/VAR_NU));
  if(CLASSIFIER){ D_CORRECT=L_CORRECT; }
  if(WPENALTY) inc_gradient_penalty(weight,dweight);
  return	energy;
}

double	objective_function(weight) /* for dfpmin */
	double	*weight; 
{
  if(DATA_FROM_IM)active_image(1);	

  D_ENERGY = find_energy(FIRST_TRAIN,LEAVEN,LAST_TRAIN,weight);
  weight_energies(weight); /* this function returns a double if wanted */ 

  if(VERBOSE==2)printf("%-7.5g	%-g	%-g	\n",0.5*(T_X2),D_X2,W_X2);
  return	(OB_F=0.5*(T_X2));
}

void	d_objective_function(weight,dweight) /* for dfpmin */
	double	*weight,*dweight; 
{

  if(DATA_FROM_IM)active_image(1);	
	D_ENERGY = find_gradient_on(FIRST_TRAIN,LEAVEN,LAST_TRAIN,weight,dweight);
	if(VERBOSE==2)print_weights(dweight,4);
}

double	logpermutations(weight)
	double	*weight;
{

	int	temp,temp2;

	/* do I want to omit unused units? */

	/* there are two possible fixes: one is to prevent their being 
a permutation contribution from 2 or more units since they are degenerate */ 
/* An alternative is to use the knowledge that the occam factor ought to 
be penalising them and isn't and try to make up for that */ 
/* I think it is best to just do the former and achieve the latter by 
deletion */ 

	temp = count_unused(weight,1);
	temp2 = count_symmetries(weight,1);
	if(ENABLE_PERMUTATION_FIDDLE){
		return( logfactorial(HIDDENN) + (double)(HIDDENN - temp)*log(2.0) - logfactorial(temp) - log((double)(temp2)) ); 
	}
	else return( logfactorial(HIDDENN) + (double)(HIDDENN)*log(2.0) );

}

void	final_line(fp,weight,leftjustify) 
	int	leftjustify;
	double	*weight; 
	FILE	*fp;
{

  int	i;


#define	PRINTTEXT						\
  if(PHN)  fprintf(fp,S2D,HIDDENN);			\
  if(PSW)  for(i=1;i<=REGS;i++)fprintf(fp,S84G,V_SIGMA_W(i));	\
  for(i=1;i<=REGS;i++){					\
     if(PBFSW)fprintf(fp,S74G,exp(V_BF_LOGS_W[i]));	\
     if(PEBSW)fprintf(fp,S43G,V_DLA[i]);	}	\
  if(PSN)  fprintf(fp,S85G,SIGMA_NU);			\
  if(PBFSN)fprintf(fp,S63G,1.0/sqrt(BF_BETA));		\
  if(PFT)  fprintf(fp,S73G,FRAC_TOL);			\
  if(POBF) fprintf(fp,S74G,OB_F);			\
  if(PDE)  fprintf(fp,S74G,D_ENERGY);			\
  if(PWE)  for(i=1;i<=REGS;i++)fprintf(fp,S54G,V_W_ENERGY[i]);	\
  if(PDX){ \
	     if(!CLASSIFIER)fprintf(fp,S54G3D,D_X2,NDF);	       	\
	     else          fprintf(fp,S3D3D,D_CORRECT,NDF);	       	\
	 }  \
  if(PNGD) fprintf(fp,S54G,DNGOODDATA);				\
  if(PWXS) for(i=1;i<=REGS;i++)fprintf(fp,S64G,V_W_X2(i) * 100.0 /(double)(V_WN[i]) ); \
  if(PWX)  fprintf(fp,S64G3D,W_X2,WEIGHTNUMBER);	\
  if(PGS)  for(i=1;i<=REGS;i++)fprintf(fp,S64G,V_GAMMA[i]);	     \
  if(PG1)  fprintf(fp,S64G,GAMMA1);			\
  if(PG2)  fprintf(fp,S64G,GAMMA2);			\
  if(PG3)  fprintf(fp,S64G,GAMMA3);			\
  if(PG4)  fprintf(fp,S64G,GAMMA4);			\
  if(PLN)  fprintf(fp,S53G,LPF_NU);			\
  if(PLW)  fprintf(fp,S53G,LPF_W);			\
  if(PLO1) fprintf(fp,S74G,LOGOCCAM1);		\
  if(PLO2) fprintf(fp,S74G,LOGOCCAM2);		\
  if(PLO3) fprintf(fp,S74G,LOGOCCAM3);		\
  if(PLO4) fprintf(fp,S74G,LOGOCCAM4);		\
  if(PLOF) fprintf(fp,S74G,LOGOCCAMFACTOR);		\
  if(PLP)  fprintf(fp,S43G,LOGPERMUTATIONS);		\
  if(PSY) {if(SYMM>9999)fprintf(fp,S43G,log((double)(SYMM))); \
           else fprintf(fp,S4D,SYMM);}			        \
  if(PNU)  fprintf(fp,S2D,NOTUSED);			\
  if(PTE)  fprintf(fp,S85G,T_ENERGY);			\
  if(PMTX) fprintf(fp,S85G,TEST_X2_MOD);			\
  if(PMTXB)fprintf(fp,S85G,TEST_X2_MODB);			\
  if(PTX){ \
	     if(!CLASSIFIER)  fprintf(fp,S64G4D,TEST_X2,TNDF);	    \
	     else          fprintf(fp,S4D4D,T_CORRECT,TNDF); \
	 } \
  if(PTL){ \
	     if(!CLASSIFIER)(fp,S75G,T_LOGLIKE);	\
	 }  \
  if(PTE2) fprintf(fp,S85G,T_ENERGY_2);		\
  if(PMTX2)fprintf(fp,S85G,TEST_X2_MOD_2);		\
  if(PMTX2B)fprintf(fp,S85G,TEST_X2_MOD_2B);		\
  if(PTX2) { \
	     if(!CLASSIFIER) fprintf(fp,S64G4D,TEST_X2_2,TNDF2);    \
	     else          fprintf(fp,S4D4D,T_CORRECT2,TNDF2); \
	   }\
  if(PTL2) { \
	     if(!CLASSIFIER)(fp,S75G,T_LOGLIKE_2);	\
	 }  \
  if(PEVAB)fprintf(fp,S75G,EVAB);			\
  if(PEVB) fprintf(fp,S75G,EVAB+LOG_AV);		\
  if(PEV)  fprintf(fp,S75G,EVAB+LOG_ABV);		\
  if(PEVAB2)fprintf(fp,S75G,EVAB2);			\
  if(PFG1) fprintf(fp,S75G,FUDGE1);			\
  if(PFG2) fprintf(fp,S75G,FUDGE2);			\
  fnewline;

	if(leftjustify){ 
#define	S2D	"%-2d "
#define	S4D	"%-4d "
#define	S75G	"%-7.5g "
#define	S73G	"%-7.3g "
#define	S75G43G	"%-7.5g %-4.3g "
#define	S85G	"%-8.5g "
#define	S84G	"%-8.4g "
#define	S63G	"%-6.3g "
#define	S74G	"%-7.4g "
#define	S64G	"%-6.4g "
#define	S54G	"%-5.4g "
#define	S76G	"%-7.6g "
#define	S3D3D	"%-3d %-3d "
#define	S4D4D	"%-4d %-4d "
#define	S54G3D	"%-5.4g %-3d "
#define	S64G4D	"%-6.4g %-3d "
#define	S64G3D	"%-6.4g %-3d "
#define	S64G2D	"%-6.4g %-2d "
#define	S74G3D	"%-7.4g %-3d "
#define	S94G3D	"%-9.4g %-3d "
#define	S54G2D	"%-5.4g %-2d "
#define	S53G	"%-5.3g "
#define	S43G	"%-4.3g "
		PRINTTEXT
#undef	S2D	
#undef	S4D	
#undef	S75G	
#undef	S73G	
#undef	S75G43G	
#undef	S85G	
#undef	S84G	
#undef	S63G	
#undef	S74G	
#undef	S64G	
#undef	S54G	
#undef	S76G	
#undef	S3D3D	
#undef	S4D4D	
#undef	S54G3D	
#undef	S64G2D	
#undef	S64G4D	
#undef	S64G3D	
#undef	S74G3D	
#undef	S94G3D	
#undef	S54G2D	
#undef	S53G	
#undef	S43G	
	}
	else{
#define	S2D	"%2d "
#define	S4D	"%4d "
#define	S75G	"%7.5g "
#define	S73G	"%7.3g "
#define	S75G43G	"%7.5g %4.3g "
#define	S85G	"%8.5g "
#define	S84G	"%8.4g "
#define	S63G	"%6.3g "
#define	S74G	"%7.4g "
#define	S64G	"%6.4g "
#define	S54G	"%5.4g "
#define	S76G	"%7.6g "
#define	S4D4D	"%4d %4d "
#define	S3D3D	"%3d %3d "
#define	S54G3D	"%5.4g %3d "
#define	S64G4D	"%6.4g %3d "
#define	S64G3D	"%6.4g %3d "
#define	S64G2D	"%6.4g %2d "
#define	S54G2D	"%5.4g %2d "
#define	S74G3D	"%7.4g %3d "
#define	S94G3D	"%9.4g %3d "
#define	S53G	"%5.3g "
#define	S43G	"%4.3g "
		PRINTTEXT
	}
}

void	headings(fp) 
	FILE	*fp;
{

	int	i;

	if(PHN)	fprintf(fp,"#H ");
	if(PSW)for(i=1;i<=REGS;i++)	fprintf(fp,"S_W[%2d]  ",i);
	for(i=1;i<=REGS;i++){
		if(PBFSW)	fprintf(fp,"Pr-S_W%d  ",i);
		if(PEBSW)	fprintf(fp,"+/- f ");
	}
	if(PSN)		fprintf(fp,"SIGMA_NU ");
	if(PBFSN)	fprintf(fp,"Pr-S_n ");
	if(PFT)		fprintf(fp,"FR_Tol  ");
	if(POBF)	fprintf(fp,"OB_F    ");
	if(PDE)		fprintf(fp,"D_EN    ");
	if(PWE)for(i=1;i<=REGS;i++)	fprintf(fp,"W_EN%d ",i);
	if(PDX&&!CLASSIFIER)		fprintf(fp,"D_X2 /NDF ");
	if(PDX&&CLASSIFIER)		fprintf(fp,"CORRCT /N ");
	if(PNGD)	fprintf(fp,"NGD   ");
	if(PWXS)for(i=1;i<=REGS;i++)	fprintf(fp,"WX2/k%d ",i);
	if(PWX)		fprintf(fp,"W_X2  /WN  ");
	if(PGS)for(i=1;i<=REGS;i++)	fprintf(fp,"gamma%d ",i);
	if(PG1)		fprintf(fp,"GAMMA1 ");
	if(PG2)		fprintf(fp,"GAMMA2 ");
	if(PG3)		fprintf(fp,"GAMMA3 ");
	if(PG4)		fprintf(fp,"GAMMA4 ");
	if(PLN)		fprintf(fp,"LP_NU ");
	if(PLW)		fprintf(fp,"LPF_W ");
	if(PLO1)	fprintf(fp,"LOGOCC  ");
	if(PLO2)	fprintf(fp,"LOGOC2  ");
	if(PLO3)	fprintf(fp,"LOGOC3  ");
	if(PLO4)	fprintf(fp,"LOGOC4  ");
	if(PLOF)	fprintf(fp,"LOGOCF  ");
	if(PLP)		fprintf(fp,"LPS  ");
	if(PSY)		fprintf(fp,"SYMS ");
	if(PNU)		fprintf(fp,"NU ");
	if(PTE)		fprintf(fp,"T_EN   ");
	if(PMTX)	fprintf(fp,"MT_E      ");
	if(PMTXB)	fprintf(fp,"MT_EB     ");
	if(PTX&&!CLASSIFIER)		fprintf(fp,"T_X2   NDF  ");
	if(PTX&&CLASSIFIER)		fprintf(fp,"T_CRECT/N ");
	if(PTL&&!CLASSIFIER)		fprintf(fp,"T_LOGLK ");
/*	if(PTL&&CLASSIFIER)		*/
	if(PTE2)	fprintf(fp,"T_EN 2   ");
	if(PMTX2)	fprintf(fp,"MT_E  2  ");
	if(PMTX2B)	fprintf(fp,"MT_E2B   ");
	if(PTX2&&!CLASSIFIER)   	fprintf(fp,"T_X2 2 NDF  ");
	if(PTX2&&CLASSIFIER)		fprintf(fp,"T_CRCT2/N ");
	if(PTL2&&!CLASSIFIER)		fprintf(fp,"T_LOGLK ");
/*	if(PTL2&&CLASSIFIER)		*/
	if(PEVAB)	fprintf(fp,"EVAB    ");
	if(PEVB)	fprintf(fp,"EVB     ");
	if(PEV)		fprintf(fp,"EVDENCE ");
	if(PEVAB2)	fprintf(fp,"EVAB2   ");
	if(PFG1)	fprintf(fp,"FUDGE1  ");
	if(PFG2)	fprintf(fp,"FUDGE2  ");
	fnewline;
}
void	objective_report(weight) 
     double	*weight; 
{
  int 	k;
  
  if(CLASSIFIER){L_CORRECT=0;}
  OB_F = objective_function(weight);
  if(CLASSIFIER){ D_CORRECT=L_CORRECT; }

  if(!CLASSIFIER)
    printf("data_X^2	%g	per dof	%g : ",D_X2,D_X_P);
  else 
    printf("cross entropy %g / %d -- number correct %d :", 
	   D_ENERGY,NDF,D_CORRECT );
  weight_energies(weight);
  printf("weight_X^2	%g\n",W_X2);
  count_unused(weight,1);
  if(VERBOSE==3){
    printf("Weights\n");
    print_weights(weight,4);
  }
}

void	lu_ev_tests(weight,luhessin,luhessinindx,luhessin2,luhessinindx2) 
	double	*weight,**luhessin,**luhessin2; 
	int	*luhessinindx,*luhessinindx2;
{ 
  int 	k;
  
  if(DATA_FROM_IM)active_image(2);	
  L_CORRECT = 0;
/* first we find the standard test error */
  T_ENERGY=find_energy(FIRST_TEST,1,LAST_TEST,weight);
  T_CORRECT = L_CORRECT ;
  if ( CLASSIFIER ) {
    if(PMTX) TEST_X2_MOD = 
      lu_find_moderated_energy(FIRST_TEST,1,LAST_TEST,weight,DPARAM,luhessin,luhessinindx);
    if(PMTXB) TEST_X2_MODB = 
      lu_find_moderated_energy(FIRST_TEST,1,LAST_TEST,weight,DPARAM,luhessin2,luhessinindx2);
  } else {
    if(PMTX) TEST_X2_MOD = 
      find_log_like(FIRST_TEST,1,LAST_TEST,weight,DPARAM,luhessin,luhessinindx);
    if(PMTXB) TEST_X2_MODB = 
      find_log_like(FIRST_TEST,1,LAST_TEST,weight,DPARAM,luhessin2,luhessinindx2);
  }
  /* used to have factors of 2.0 in these */
  if(DATA_FROM_IM)active_image(3);
  L_CORRECT = 0;
  T_ENERGY_2=find_energy(FIRST_TEST2,1,LAST_TEST2,weight);
  T_CORRECT2 = L_CORRECT ;
  if ( CLASSIFIER ) {
    if(PMTX2)TEST_X2_MOD_2=lu_find_moderated_energy(FIRST_TEST2,1,LAST_TEST2,weight,DPARAM,luhessin,luhessinindx);
    if(PMTX2B)TEST_X2_MOD_2B=lu_find_moderated_energy(FIRST_TEST2,1,LAST_TEST2,weight,DPARAM,luhessin2,luhessinindx2);
  } else {
    if(PMTX2) TEST_X2_MOD_2 = 
      find_log_like(FIRST_TEST2,1,LAST_TEST2,weight,DPARAM,luhessin,luhessinindx);
    if(PMTX2B) TEST_X2_MOD_2B = 
      find_log_like(FIRST_TEST2,1,LAST_TEST2,weight,DPARAM,luhessin2,luhessinindx2);
  }
}

void	initialise_hessin(weight,hessin)	 
	double	*weight,**hessin;
{
        int     j,k;

        for(j=1;j<=FREENUMBER;j++) {
		hessin[j][j]=VAR_NU/sqrt( (DNDF) );
        	for(k=1;k<j;k++) 
			hessin[j][k]=hessin[k][j]=0.0;
	}
}

void	evaluate_hessian(hessian,weight,low,step,high,dweight)	  /* write me! */
     double	*weight,**hessian,*dweight;
     int low,step,high;
{
  double class_gradient=1.0;
  
  int     i,k,layer;
  
  if ( VERBOSE==2 ) printf("evaluate_hessian\n");
  constantdmatrix(hessian,1,WEIGHTNUMBER,1,WEIGHTNUMBER,0.0);
  for(i=1;i<=OUTPUTN;i++)
    DELTA(i,OUTPUTLAYER)=0.0;
  
  for(k=low;k<=high;k+=step){
    load_activities(k);
    full_forward_pass(weight);
    /*	energy+=error_signal(k); there is no need for errors using this approximation */ 
    layer=OUTPUTLAYER;
    for(i=1;i<=OUTPUTN;i++){
      ERROR(i,layer)=1.0;
      DELTA(i,layer)=d_act_fun(AF_(layer),AACT(i,layer),XACT(i,layer),AXACT(i,layer)) * ERROR(i,layer); /* NB this activation function is linear for class (10), so need to send f' to inc_hessian This correction made Sept 91 */
      full_back_pass(weight);/* slightly inefficient */
      if(CLASSIFIER)class_gradient=d_act_fun((int)(3),AACT(i,layer),XACT(i,layer),AXACT(i,layer));
      if(VERBOSE==2) printf
	("a=%5.4g, x=%5.4g, antix=%5.4g, cg=%5.4g\n",
	 AACT(i,layer),XACT(i,layer),AXACT(i,layer),class_gradient);
      inc_hessian(weight,hessian,dweight,class_gradient);
      sym_hessian(hessian);
      DELTA(i,layer)=0.0;
    }
  }
  if(WPENALTY)
    for(i=1;i<=WEIGHTNUMBER;i++)
      hessian[i][i]+=1.0/V_VAR_W(REGCLASS[i]);
  
}

void	find_error_bars(weight,dweight,hessin)	  
	double	*weight,*dweight,**hessin;
{
		
        int     i,layer;
	double	dtemp;

	layer=OUTPUTLAYER;
	for(i=1;i<=OUTPUTN;i++)
		DELTA(i,OUTPUTLAYER)=0.0;

	for(i=1;i<=OUTPUTN;i++){
		ERROR(i,layer)=1.0;
		DELTA(i,layer)=d_act_fun(AF_(layer),AACT(i,layer),XACT(i,layer),AXACT(i,layer)) * ERROR(i,layer);
		full_back_pass(weight);/* slightly inefficient */
		set_gradient(weight,dweight);
		if(VERBOSE==2)	print_weights(dweight,4);
		dtemp = quadratic_form(hessin,dweight,1,WEIGHTNUMBER);
		if(VERBOSE==2)printf("quadratic_form %g \n",dtemp);
		if(dtemp>0.0){
			ERRORBARS[i] = sqrt(dtemp);
			ERRORBARS2[i] = dtemp;
			if(CLASSIFIER&&!CLASSEBSTYLE) ERRORBARS[i] *= d_act_fun(3,AACT(i,layer),XACT(i,layer),AXACT(i,layer)); /* This turns error bars on A into error bars on X, ie the output */
/* actually I don't like this -- the error bars are when big not going to be at all 
symmetrical, so a different calculation should be done to redefine the `mean probability' 
also */
		}
		else{
			printf("Quadratic form not positive, %g\n",dtemp);
			ERRORBARS[i] = 0.0;
			ERRORBARS2[i] = 0.0;
		}
		DELTA(i,layer)=0.0;
	}
}

void	find_and_write_input_sensitivities(fp,weight,newln)	  
     double	*weight;
     FILE       *fp;
     int        newln;
{
		
        int     i,j,layer;
	double	dtemp;

	layer=OUTPUTLAYER;
	for(i=1;i<=OUTPUTN;i++)
		DELTA(i,OUTPUTLAYER)=0.0;

	for(i=1;i<=OUTPUTN;i++){
		ERROR(i,layer)=1.0;
		DELTA(i,layer)=d_act_fun(AF_(layer),AACT(i,layer),XACT(i,layer),AXACT(i,layer)) * ERROR(i,layer);
		full_back_pass(weight);/* slightly inefficient */
		back_to_inputs(weight);
/* sensitivities of outputs to inputs are now lurking in ERROR(i,INPUTLAYER) */
/* write them to fp */
/* ***************************** */
		for(j=1;j<=NUMBERIN_(INPUTLAYER);j++){
		  fprintf(fp,"%9g	",ERROR(j,INPUTLAYER));
		}
		if(j<NUMBERIN_(INPUTLAYER)){ fprintf(fp,"	");}
		DELTA(i,layer)=0.0;
	}
	if(newln)fnewline;
}

void	lu_find_error_bars(weight,dweight,luhessin,luhessinindx)	  
	double	*weight,*dweight,**luhessin;
	int	*luhessinindx;
{
  int     i,layer;
  double	dtemp;

  layer=OUTPUTLAYER;
  for(i=1;i<=OUTPUTN;i++)
    DELTA(i,OUTPUTLAYER)=0.0;
  
  for(i=1;i<=OUTPUTN;i++){
    ERROR(i,layer)=1.0;
    DELTA(i,layer)=d_act_fun(AF_(layer),AACT(i,layer),XACT(i,layer),AXACT(i,layer)) * ERROR(i,layer);
    full_back_pass(weight);/* slightly inefficient */
    set_gradient(weight,dweight);
    if(VERBOSE==2)	print_weights(dweight,4);
    copydvector(dweight,WEIGHTNUMBER,SCRATCHV);
    dtemp = lumatrixproduct(dweight,luhessin,SCRATCHV,WEIGHTNUMBER,luhessinindx);
    if(VERBOSE==2)printf("quadratic_form %g \n",dtemp);
    if(dtemp>0.0){
      ERRORBARS[i] = sqrt(dtemp);
      ERRORBARS2[i] = dtemp;
      if(CLASSIFIER&&!CLASSEBSTYLE) ERRORBARS[i] *= d_act_fun(3,AACT(i,layer),XACT(i,layer),AXACT(i,layer)); /* This turns error bars on A into error bars on X, ie the output -- but see marginalization method instead */
    }
    else{
      printf("warning: Quadratic form not positive, %g\n",dtemp);
      ERRORBARS[i] = 0.0;
      ERRORBARS2[i] = 0.0;
    }
    DELTA(i,layer)=0.0;
  }
}

void	ch_find_error_bars(weight,dweight,mlu,mindx,gradients,b,minb,l,ratio)
	/* CHEAPO version */  
	double	*weight,*dweight,**mlu,**gradients,*b,*minb,ratio;
	int	*mindx,l;
{
		
        int     i,layer,j,k;
	double	dtemp;

	layer=OUTPUTLAYER;
	for(i=1;i<=OUTPUTN;i++)
		DELTA(i,OUTPUTLAYER)=0.0;

	for(i=1;i<=OUTPUTN;i++){
		ERROR(i,layer)=1.0;
		DELTA(i,layer)=d_act_fun(AF_(layer),AACT(i,layer),XACT(i,layer),AXACT(i,layer)) * ERROR(i,layer);
		full_back_pass(weight);/* slightly inefficient */
		set_gradient(weight,dweight);
		if(VERBOSE==2)	print_weights(dweight,4);
		for(j=1;j<=l;j++){
			b[j]=ddot(gradients[j]+1,dweight+1,WEIGHTNUMBER);
		}
		lubksb(mlu,l,mindx,b);
		dtemp=ddot(b+1,b+1,l)*ratio;
		if(dtemp>0.0){
			ERRORBARS[i] = sqrt(dtemp);
			ERRORBARS2[i] = dtemp;
		}
		else{
			printf("Quadratic form not positive, %g\n",dtemp);
			ERRORBARS[i] = 0.0;
			ERRORBARS2[i] = 0.0;
		}
		DELTA(i,layer)=0.0;
	}
}

void	find_error_bars2(weight,hessin,ebmatrix)	
     /* finds the full error bar matrix between output units */ 
	double	*weight,**hessin,**ebmatrix;
{
		
        int     i,j,layer;
	double	dtemp,**gradients;

	layer=OUTPUTLAYER;
	for(i=1;i<=OUTPUTN;i++)
		DELTA(i,OUTPUTLAYER)=0.0;

/*	Have to find  the gradient vector for each output unit and keep them all */
	gradients=dmatrix(1,OUTPUTN,1,WEIGHTNUMBER);
	for(i=1;i<=OUTPUTN;i++){
		ERROR(i,layer)=1.0;
		DELTA(i,layer)=d_act_fun(AF_(layer),AACT(i,layer),XACT(i,layer),AXACT(i,layer)) * ERROR(i,layer);
		full_back_pass(weight);/* slightly inefficient */
		set_gradient(weight,gradients[i]);
		if(VERBOSE==2)	print_weights(gradients[i],4);
		DELTA(i,layer)=0.0;
	}
	for(i=1;i<=OUTPUTN;i++){
		j=i;
		ebmatrix[i][i]=dtemp = quadratic_form(hessin,gradients[i],1,WEIGHTNUMBER);
		if(VERBOSE==2)printf("quadratic_form %g \n",dtemp);
		if(dtemp>0.0){
			ERRORBARS[i] = sqrt(dtemp);
		} 
		else{
			printf("Qf not positive, %g\n",dtemp);
			ERRORBARS[i] = 0.0;
		}
		for(j=i+1;j<=OUTPUTN;j++){
			ebmatrix[i][j]=ebmatrix[j][i]=matrixproduct(gradients[i],hessin,gradients[j],1,WEIGHTNUMBER);
			printf("Executed off-diagonal loop: %g\n",ebmatrix[i][j]);
		}
	}
	free_dmatrix(gradients,1,OUTPUTN,1,WEIGHTNUMBER);
}

void	lu_find_error_bars2(weight,luhessin,ebmatrix,luhessinindx)	  /* finds the 2D error bars between 
		output units */ 
	double	*weight,**luhessin,**ebmatrix;
	int	*luhessinindx;
{
		
        int     i,j,layer;
	double	dtemp,**gradients;

	layer=OUTPUTLAYER;
	for(i=1;i<=OUTPUTN;i++)
		DELTA(i,OUTPUTLAYER)=0.0;

/*	Have to find  the gradient vector for each output unit and keep them all */
	gradients=dmatrix(1,OUTPUTN,1,WEIGHTNUMBER);
	for(i=1;i<=OUTPUTN;i++){
		ERROR(i,layer)=1.0;
		DELTA(i,layer)=d_act_fun(AF_(layer),AACT(i,layer),XACT(i,layer),AXACT(i,layer)) * ERROR(i,layer);
		full_back_pass(weight);/* slightly inefficient */
		set_gradient(weight,gradients[i]);
		if(VERBOSE==2)	print_weights(gradients[i],4);
		DELTA(i,layer)=0.0;
	}
	for(i=1;i<=OUTPUTN;i++){
		j=i;
		copydvector(gradients[i],WEIGHTNUMBER,SCRATCHV);
		ebmatrix[i][i]=dtemp = lumatrixproduct(gradients[i],luhessin,SCRATCHV,WEIGHTNUMBER,luhessinindx);
	/* this creates SCRATCHV = H^-1 gradient_i */
		if(VERBOSE==2)printf("quadratic_form %g \n",dtemp);
		if(dtemp>0.0){
			ERRORBARS[i] = sqrt(dtemp);
		} 
		else{
			printf("Qf not positive, %g\n",dtemp);
			ERRORBARS[i] = 0.0;
		}
		for(j=i+1;j<=OUTPUTN;j++){
			ebmatrix[i][j]=ebmatrix[j][i]=ddot(gradients[j]+1,SCRATCHV+1,WEIGHTNUMBER);
			printf("Executed off-diagonal loop: %g\n",ebmatrix[i][j]);
		}
	}
	free_dmatrix(gradients,1,OUTPUTN,1,WEIGHTNUMBER);
}

void	incorporate_sigma_into_error_bars(weight)	  
     double	*weight;
{
  
  int     i;
  double	dtemp;
  
  for(i=1;i<=OUTPUTN;i++){
    dtemp = (ERRORBARS[i]*ERRORBARS[i]) + VAR_NU;
    if(dtemp>0.0)
      ERRORBARS[i] = sqrt(dtemp);
    else{
      printf("Quadratic form not positive, %g\n",dtemp);
      ERRORBARS[i] = 0.0;
    }
  }
}

void	incorporate_sigma_into_error_bars2(weight,ebmatrix)	  
	double	*weight,**ebmatrix;
{
		
        int     i;
	double	dtemp;

	for(i=1;i<=OUTPUTN;i++){
		ebmatrix[i][i] +=  VAR_NU;
	}
}

static void *malloce(n)
int n;
{
   void *rv;
 
   rv = malloc(n);
   if (rv == NULL)
   {
      fprintf(stderr, "Error allocating %d bytes\n", n);
      exit(4);
   }
   return rv;
}

static void readb(filename, buffer, np)
const char *filename;
unsigned char *buffer;
int np;
{
   FILE *fp;
   int nread;

   fp = fopene(filename, "rb");
   nread = fread(buffer, sizeof(unsigned char), np, fp);
   if (nread != np)
      fprintf(stderr, "readb - error in freadl, np=%d, nread=%d\n", np,
         nread);

   fclose(fp);
}

static FILE *fopene(file, mode)
const char *file;
const char *mode;
{
   FILE *rv;

   rv = fopen(file, mode);
   if (rv != NULL) return rv;
   fprintf(stderr, "Unable to open file '%s'", file);
   if (*mode == 'r') fprintf(stderr, " for reading");
   else if (*mode == 'w') fprintf(stderr, " for writing");
   else if (*mode == 'a') fprintf(stderr, " for appending");
   fputc('\n', stderr);
   exit(1);
}
