/*
	  Functions for handling the Base Pair Probability Matrix
		      Peter F Stadler, Ivo L Hofacker
			    Vienna RNA Package
*/
/* Last changed Time-stamp: <2002-11-07 12:46:16 ivo> */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#include "dist_vars.h"
#include "fold_vars.h"
#include "part_func.h"
#include "utils.h"
#include "profiledist.h"

/*@unused@*/
static char rcsid[] = "$Id: ProfileDist.c,v 1.6 2002/11/07 11:49:59 ivo Exp $";

PRIVATE int *alignment[2];

PRIVATE void    sprint_aligned_bppm(const float *T1, const float *T2);
PRIVATE double  PrfEditCost(int i, int j, const float *T1, const float *T2);
PRIVATE double  average(double x, double y);

/*---------------------------------------------------------------------------*/

PUBLIC float profile_edit_distance(const float *T1, const float *T2)
{
  /* align the 2 probability profiles T1, T2 */
  /* This is like a Needleman-Wunsch alignment,
     we should really use affine gap-costs ala Gotoh */

  float    **distance;
  short    **i_point, **j_point;

  int           i, j, i1, j1, pos, length1,length2;
  float         minus, plus, change, temp;

  length1 = (int) T1[0];
  length2 = (int) T2[0];
  distance = (float **)     space((length1 +1)*sizeof(float *));
  if(edit_backtrack){
    i_point  = (short **)  space((length1 +1)*sizeof(short *));
    j_point  = (short **)  space((length1 +1)*sizeof(short *));
  }
  for(i=0; i<= length1; i++){
    distance[i] = (float *) space( (length2+1)*sizeof(float));
    if(edit_backtrack){
      i_point[i]  = (short *) space( (length2+1)*sizeof(short));
      j_point[i]  = (short *) space( (length2+1)*sizeof(short));
    }
  }

  for(i = 1; i <= length1; i++) {
    distance[i][0] = distance[i-1][0]+PrfEditCost(i,0,T1,T2);
    if(edit_backtrack){ i_point[i][0] = (short) i-1; j_point[i][0] = 0;   }
  }
  for(j = 1; j <= length2; j++) {
    distance[0][j] = distance[0][j-1]+PrfEditCost(0,j,T1,T2);
    if(edit_backtrack){ i_point[0][j] = 0;   j_point[0][j] = (short) j-1; }
  }
  for (i = 1; i <= length1; i++) {
    for (j = 1; j <= length2 ; j++) {
      minus  = distance[i-1][j]  + PrfEditCost(i,0,T1,T2);
      plus   = distance[i][j-1]  + PrfEditCost(0,j,T1,T2);
      change = distance[i-1][j-1]+ PrfEditCost(i,j,T1,T2);

      distance[i][j] = MIN3(minus, plus, change);
      /* printf("%g ", distance[i][j]); */

      if(edit_backtrack){
	if(distance[i][j] == change) {
	  i_point[i][j]= (short)i-1; j_point[i][j]= (short) j-1;  }
	else if(distance[i][j] == plus) {
	  i_point[i][j]= (short)i  ; j_point[i][j]= (short)j-1;  }
	else {
	  i_point[i][j]= (short)i-1; j_point[i][j]= (short)j  ;  }
      }
    }
    /* printf("\n"); */
  }
  /* printf("\n"); */
  temp = distance[length1][length2];
  for(i=0;i<=length1;i++)
    free(distance[i]);
  free(distance);

  if(edit_backtrack){
    alignment[0] = (int *) space((length1+length2+1)*sizeof(int));
    alignment[1] = (int *) space((length1+length2+1)*sizeof(int));

    pos = length1+length2;
    i   = length1;
    j   = length2;
    while( (i>0)||(j>0) ) {
      i1 = i_point[i][j];
      j1 = j_point[i][j];
      if( ((i-i1)==1)&&((j-j1)==1) )  {  /* substitution    */
	alignment[0][pos] = i;
	alignment[1][pos] = j;
      }
      if( ((i-i1)==1)&&(j==j1) )      {  /* Deletion in [1] */
	alignment[0][pos] = i;
	alignment[1][pos] = 0;
      }
      if( (i==i1)&&((j-j1)==1)  )      {  /* Deletion in [0] */
	alignment[0][pos] = 0;
	alignment[1][pos] = j;
      }
      pos--;
      i = i1;
      j = j1;
    }
    for(i=pos+1; i<=length1+length2; i++){
      alignment[0][i-pos] = alignment[0][i];
      alignment[1][i-pos] = alignment[1][i];
    }
    alignment[0][0] = length1+length2-pos;   /* length of alignment */

    for(i=0; i<=length1; i++){
      free(i_point[i]); free(j_point[i]);
    }
    free(i_point); free(j_point);
    sprint_aligned_bppm(T1,T2);
    free(alignment[0]);
    free(alignment[1]);
  }

  return temp;
}


/*---------------------------------------------------------------------------*/

PRIVATE double PrfEditCost(int i, int j, const float *T1, const float *T2)
{
  double  dist;
  int    k, kmax;

  kmax = (int) T1[1];
  if ((int) T2[1] != kmax) nrerror("inconsistent Profiles in PrfEditCost");

  if(i==0) {
    for(dist = 0. ,k=0 ; k<kmax ; k++)
      dist += T2[j*kmax+k];
  }
  if(j==0) {
    for(dist = 0. ,k=0 ; k<kmax ; k++)
      dist += T1[i*kmax+k];
  }
  if((i>0)&&(j>0)) {
    for(dist = 2.,k=0; k<kmax; k++)
      dist -= 2.*average(T1[i*kmax+k],T2[j*kmax+k]);
  }
  return dist;
}

/*---------------------------------------------------------------------------*/

PRIVATE double average(double x, double y)

/* can be essentially anything that fulfils :
   1.)     a(x,y)  =  a(y,x)
   2.)     a(x,y) >=  0       for 0<= x,y <= 1
   3.)     a(x,y) <=  (x+y)/2
   4.)     a(x,x) >=  a(x,y)  for 0<= x,y <= 1
   As in Bonhoeffer et al (1993) 'RNA Multi Structure Landscapes',
   Eur. Biophys. J. 22: 13-24 we have chosen  the geometric mean.
*/

{
    float a;
    a = (float) sqrt(x*y);
    return a;
}

/*---------------------------------------------------------------------------*/

PUBLIC float *Make_bp_profile_bppm(FLT_OR_DBL *bppm, int length){
   int i,j;
   int L=3;
   float *P; /* P[i*3+0] unpaired, P[i*3+1] upstream, P[i*3+2] downstream p */
   int *index = get_iindx((unsigned) length);

   P =  (float *) space((length+1)*3*sizeof(float));
   /* indices start at 1 use first entries to store length and dimension */
   P[0] = (float) length;
   P[1] = (float) L;

   for( i=1; i<length; i++)
     for( j=i+1; j<=length; j++ ) {
       P[i*L+1] += bppm[index[i]-j];
       P[j*L+2] += bppm[index[i]-j];
     }
   for( i=1; i<=length; i++)
     P[i*3+0] = 1 - P[i*3+1] - P[i*3+2];

   free(index);

   return (float *) P;
}

/*---------------------------------------------------------------------------*/

PRIVATE void sprint_aligned_bppm(const float *T1, const float *T2)
{
   int     i, length;
   length = alignment[0][0];
   aligned_line[0] = (char *) space((length+1)*sizeof(char));
   aligned_line[1] = (char *) space((length+1)*sizeof(char));
   for(i=1; i<=length; i++){
      if(alignment[0][i] ==0) aligned_line[0][i-1] = '_';
      else { aligned_line[0][i-1] = bppm_symbol(T1+alignment[0][i]*3); }
      if(alignment[1][i] ==0) aligned_line[1][i-1] = '_';
      else { aligned_line[1][i-1] = bppm_symbol(T2+alignment[1][i]*3); }
   }
}

/*---------------------------------------------------------------------------*/

PUBLIC void print_bppm(const float *T)
{
   int i;
   for(i=1; i<=( (int)T[0]); i++)
      printf("%c",bppm_symbol(T+i*3));
   printf("\n");
}

/*---------------------------------------------------------------------------*/

PUBLIC void     free_profile(float *T)
{
   free(T);
}

/*###########################################*/
/*# deprecated functions below              #*/
/*###########################################*/

PUBLIC float *Make_bp_profile(int length){
   return Make_bp_profile_bppm(pr, length);
}


