/* Last changed Time-stamp: <2006-03-02 22:48:15 ivo> */
/*
                  Local version of RNAalifold

                  c Ivo L Hofacker, Stephan Bernhart
                  Vienna RNA package
*/

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <ctype.h>
#include <unistd.h>
#include <string.h>
#include "fold.h"
#include "part_func.h"
#include "fold_vars.h"
#include "PS_dot.h"
#include "utils.h"
#include "pair_mat.h"
#include "alifold.h"
#include "Lfold.h"
#include "aln_util.h"
#include "read_epars.h"
#include "RNALalifold_cmdl.h"

/*@unused@*/
static const char rcsid[] = "$Id: RNALalifold.c,v 1.1 2007/06/23 09:52:29 ivo Exp $";

/*@exits@*/
PRIVATE void  usage(void);
PRIVATE char  *annote(const char *structure, const char *AS[]);
PRIVATE void  print_pi(const pair_info pi, FILE *file);
PRIVATE cpair *make_color_pinfo(const pair_info *pi);
PRIVATE cpair *make_color_pinfo2(char **sequences, plist *pl, int n_seq);

#define MAX_NUM_NAMES    500

int main(int argc, char *argv[]){
  struct        RNALalifold_args_info args_info;
  char          *string, *structure, *ParamFile, *ns_bases, *c;
  char          ffname[FILENAME_MAX_LENGTH], gfname[FILENAME_MAX_LENGTH], fname[FILENAME_MAX_LENGTH];
  int           n_seq, i, length, sym, r, maxdist, unchangednc, unchangedcv;
  int           mis, pf, istty;
  float         cutoff;
  double        min_en, real_en, sfact;
  char          *AS[MAX_NUM_NAMES];          /* aligned sequences */
  char          *names[MAX_NUM_NAMES];       /* sequence names */
  FILE          *clust_file = stdin;

  string = structure = ParamFile = ns_bases = NULL;
  mis = pf      = 0;
  maxdist       = 70;
  do_backtrack  = unchangednc = unchangedcv = 1;
  dangles       = 2;
  sfact         = 1.07;
  cutoff        = 0.0005;
  ribo          = 0;
  /*
  #############################################
  # check the command line parameters
  #############################################
  */
  if(RNALalifold_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
  /* temperature */
  if(args_info.temp_given)        temperature = args_info.temp_arg;
  /* structure constraint */
  if(args_info.noTetra_given)     tetra_loop=0;
  /* set dangle model */
  if(args_info.dangles_given)     dangles = args_info.dangles_arg;
  /* do not allow weak pairs */
  if(args_info.noLP_given)        noLonelyPairs = 1;
  /* do not allow wobble pairs (GU) */
  if(args_info.noGU_given)        noGU = 1;
  /* do not allow weak closing pairs (AU,GU) */
  if(args_info.noClosingGU_given) no_closingGU = 1;
  /* set energy model */
  if(args_info.energyModel_given) energy_set = args_info.energyModel_arg;
  /* take another energy parameter set */
  if(args_info.paramFile_given)   ParamFile = strdup(args_info.paramFile_arg);
  /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
  if(args_info.nsp_given)         ns_bases = strdup(args_info.nsp_arg);
  /* set pf scaling factor */
  if(args_info.pfScale_given)     sfact = args_info.pfScale_arg;
  /* partition function settings */
  if(args_info.partfunc_given){
    pf = 1;
    if(args_info.partfunc_arg != -1)
      do_backtrack = args_info.partfunc_arg;
  }
  /* set cfactor */
  if(args_info.cfactor_given){
    cv_fact = args_info.cfactor_arg;
    unchangedcv = 0;
  }
  /* set nfactor */
  if(args_info.nfactor_given){
    nc_fact = args_info.nfactor_arg;
    unchangednc = 0;
  }
  /* set the maximum base pair span */
  if(args_info.span_given)        maxdist = args_info.span_arg;
  /* set the pair probability cutoff */
  if(args_info.cutoff_given)      cutoff  = args_info.cutoff_arg;
  /* calculate most informative sequence */
  if(args_info.mis_given)         mis = 1;
  if(args_info.csv_given)         csv = 1;
  if(args_info.ribosum_file_given){
    RibosumFile = strdup(args_info.ribosum_file_arg);
    ribo = 1;
  }
  if(args_info.ribosum_scoring_given){
    RibosumFile = NULL;
    ribo = 1;
  }

  /* check unnamed options a.k.a. filename of input alignment */
  if(args_info.inputs_num == 1){
    clust_file = fopen(args_info.inputs[0], "r");
    if(clust_file == NULL){
      fprintf(stderr, "can't open %s\n", args_info.inputs[0]);
    }
  }
  else{
    RNALalifold_cmdline_parser_print_help();
    exit(1);
  }

  /* free allocated memory of command line data structure */
  RNALalifold_cmdline_parser_free (&args_info);

  /*
  #############################################
  # begin initializing
  #############################################
  */
  if ((ribo==1)&&(unchangednc)) nc_fact=0.5;
  if ((ribo==1)&&(unchangedcv)) cv_fact=0.6;

  if (ParamFile != NULL)
    read_parameter_file(ParamFile);

  if (ns_bases != NULL) {
    nonstandards = space(33);
    c=ns_bases;
    i=sym=0;
    if (*c=='-') {
      sym=1; c++;
    }
    while (*c!='\0') {
      if (*c!=',') {
        nonstandards[i++]=*c++;
        nonstandards[i++]=*c;
        if ((sym)&&(*c!=*(c-1))) {
          nonstandards[i++]=*c;
          nonstandards[i++]=*(c-1);
        }
      }
      c++;
    }
  }

  istty = isatty(fileno(stdout))&&isatty(fileno(stdin));

  if (istty && (clust_file == stdin)) {
    print_tty_input_seq_str("Input aligned sequences in clustalw format");
  }

  n_seq = read_clustal(clust_file, AS, names);
  if (clust_file != stdin) fclose(clust_file);
  if (n_seq==0)
    nrerror("no sequences found");

  length = (int) strlen(AS[0]);
  if (length<maxdist) {
    fprintf(stderr, "Alignment length < window size: setting L=%d\n",length);
    maxdist=length;
  }

  structure = (char *) space((unsigned) length+1);

  /*
  #############################################
  # begin calculations
  #############################################
  */
  update_fold_params();
  if(!pf)
    min_en = aliLfold((const char **) AS, structure, maxdist);
  {
    eos_debug=-1; /* shut off warnings about nonstandard pairs */
    /*   for (i=0; AS[i]!=NULL; i++)
    s += energy_of_struct(AS[i], structure);
    real_en = s/i;*/
  }
  string = (mis) ? consens_mis((const char **) AS) : consensus((const char **) AS);
  printf("%s\n%s\n", string, structure);
  /*  if (istty)
    printf("\n minimum free energy = %6.2f kcal/mol (%6.2f + %6.2f)\n",
           min_en, real_en, min_en - real_en);
  else
    printf(" (%6.2f = %6.2f + %6.2f) \n", min_en, real_en, min_en-real_en );
  */
  strcpy(ffname, "alirna.ps");
  strcpy(gfname, "alirna.g");

  /*  if (length<=2500) {
    char *A;
    A = annote(structure, (const char**) AS);
    (void) PS_rna_plot_a(string, structure, ffname, NULL, A);
    free(A);
  } else
    fprintf(stderr,"INFO: structure too long, not doing xy_plot\n");
  */
  /* {*/ /* free mfe arrays but preserve base_pair for PS_dot_plot */
  /*  struct bond  *bp;
    bp = base_pair; base_pair = space(16);
    free_alifold_arrays();  / * frees base_pair *  /
    base_pair = bp;
  }*/
  if (pf) {
    double energy, kT;
    plist *pl;
    char * mfe_struc;

    mfe_struc = strdup(structure);

    kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
    pf_scale = -1;/*exp(-(sfact*min_en)/kT/length);*/
    if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);
    fflush(stdout);

    /* init_alipf_fold(length); */

    /* energy = alipfW_fold(AS, structure, &pl, maxdist, cutoff); */

    if (do_backtrack) {
      printf("%s", structure);
      /*if (!istty) printf(" [%6.2f]\n", energy);
        else */
      printf("\n");
    }
    /*if ((istty)||(!do_backtrack))
      printf(" free energy of ensemble = %6.2f kcal/mol\n", energy);
    useless!!*/
    /* printf(" frequency of mfe structure in ensemble %g\n",
       exp((energy-min_en)/kT));*/

    if (do_backtrack) {
      FILE *aliout;
      cpair *cp;
      strcpy(ffname, "alifold.out");
      aliout = fopen(ffname, "w");
      if (!aliout) {
        fprintf(stderr, "can't open %s    skipping output\n", ffname);
      } else {
        fprintf(aliout, "%d sequence; length of alignment %d\n",
                n_seq, length);
        fprintf(aliout, "alifold output\n");

        fprintf(aliout, "%s\n", structure);
      }
      strcpy(ffname, "alidotL.ps");
      cp = make_color_pinfo2(AS,pl,n_seq);
      (void) PS_color_dot_plot_turn(string, cp, ffname, maxdist);
      free(cp);
    }
    free(mfe_struc);
    free(pl);
  }
  free(base_pair);
  (void) fflush(stdout);
  free(string);
  free(structure);
  for (i=0; AS[i]; i++) {
    free(AS[i]); free(names[i]);
  }
  return 0;
}

PRIVATE void print_pi(const pair_info pi, FILE *file) {
  const char *pname[8] = {"","CG","GC","GU","UG","AU","UA", "--"};
  int i;

  /* numbering starts with 1 in output */
  fprintf(file, "%5d %5d %2d %5.1f%% %7.3f",
          pi.i, pi.j, pi.bp[0], 100.*pi.p, pi.ent);
  for (i=1; i<=7; i++)
    if (pi.bp[i]) fprintf(file, " %s:%-4d", pname[i], pi.bp[i]);
  /* if ((!pi.sym)&&(pi.j>=0)) printf(" *"); */
  if (!pi.comp) fprintf(file, " +");
  fprintf(file, "\n");
}

PRIVATE cpair *make_color_pinfo(const pair_info *pi) {
  cpair *cp;
  int i, n;
  for (n=0; pi[n].i>0; n++);
  cp = (cpair *) space(sizeof(cpair)*(n+1));
  for (i=0; i<n; i++) {
    int j, ncomp;
    cp[i].i = pi[i].i;
    cp[i].j = pi[i].j;
    cp[i].p = pi[i].p;
    for (ncomp=0, j=1; j<=6; j++) if (pi[i].bp[j]) ncomp++;
    cp[i].hue = (ncomp-1.0)/6.2;   /* hue<6/6.9 (hue=1 ==  hue=0) */
    cp[i].sat = 1 - MIN2( 1.0, pi[i].bp[0]/2.5);
    cp[i].mfe = pi[i].comp;
  }
  return cp;
}


#if 0
PRIVATE char *annote(const char *structure, const char *AS[]) {
  char *ps;
  int i, n, s, maxl;
  short *ptable;
  make_pair_matrix();
  n = strlen(AS[0]);
  maxl = 1024;
  ps = (char *) space(maxl);
  ptable = make_pair_table(structure);
  for (i=1; i<=n; i++) {
    char pps[64], ci='\0', cj='\0';
    int j, type, pfreq[8] = {0,0,0,0,0,0,0,0}, vi=0, vj=0;
    if ((j=ptable[i])<i) continue;
    for (s=0; AS[s]!=NULL; s++) {
      type = pair[encode_char(AS[s][i-1])][encode_char(AS[s][j-1])];
      pfreq[type]++;
      if (type) {
        if (AS[s][i-1] != ci) { ci = AS[s][i-1]; vi++;}
        if (AS[s][j-1] != cj) { cj = AS[s][j-1]; vj++;}
      }
    }
    if (maxl - strlen(ps) < 128) {
      maxl *= 2;
      ps = realloc(ps, maxl);
      if (ps==NULL) nrerror("out of memory in realloc");
    }
    if (pfreq[0]>0) {
      snprintf(pps, 64, "%d %d %d gmark\n", i, j, pfreq[0]);
      strcat(ps, pps);
    }
    if (vi>1) {
      snprintf(pps, 64, "%d cmark\n", i);
      strcat(ps, pps);
    }
    if (vj>1) {
      snprintf(pps, 64, "%d cmark\n", j);
      strcat(ps, pps);
    }
  }
  free(ptable);
  return ps;
}
#endif
/*-------------------------------------------------------------------------*/

PRIVATE cpair *make_color_pinfo2(char **sequences, plist *pl, int n_seq) {
  cpair *cp;
  int i, n,s, a, b,z;
  int franz[7];
  for (n=0; pl[n].i>0; n++);
  cp = (cpair *) space(sizeof(cpair)*(n+1));
  for (i=0; i<n; i++) {
    int ncomp=0;
    cp[i].i = pl[i].i;
    cp[i].j = pl[i].j;
    cp[i].p = pl[i].p;
    for (z=0; z<7; z++) franz[z]=0;
    for (s=0; s<n_seq; s++) {
      a=encode_char(toupper(sequences[s][cp[i].i-1]));
      b=encode_char(toupper(sequences[s][cp[i].j-1]));
      if ((sequences[s][cp[i].j-1]=='~')||(sequences[s][cp[i].i-1] == '~')) continue;
      franz[BP_pair[a][b]]++;
    }
    for (z=1; z<7; z++) {
      if (franz[z]>0) {
        ncomp++;
      }}
    cp[i].hue = (ncomp-1.0)/6.2;   /* hue<6/6.9 (hue=1 ==  hue=0) */
    cp[i].sat = 1 - MIN2( 1.0, franz[0]/*pi[i].bp[0]*//2.5);
    /*computation of entropy is sth for the ivo*/
    /* cp[i].mfe = pi[i].comp;  don't have that .. yet*/
  }
  return cp;
}
